{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introductory examples"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.usa.gov data from bit.ly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.647822Z",
     "start_time": "2018-12-25T22:52:39.605597Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "u'/Users/thomas_young/Documents/git_download/pydata-book'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.676868Z",
     "start_time": "2018-12-25T22:52:39.651514Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.704719Z",
     "start_time": "2018-12-25T22:52:39.680005Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{ \"a\": \"Mozilla\\\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\\\/535.11 (KHTML, like Gecko) Chrome\\\\/17.0.963.78 Safari\\\\/535.11\", \"c\": \"US\", \"nk\": 1, \"tz\": \"America\\\\/New_York\", \"gr\": \"MA\", \"g\": \"A6qOVH\", \"h\": \"wfLQtf\", \"l\": \"orofrog\", \"al\": \"en-US,en;q=0.8\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\\\/\\\\/www.facebook.com\\\\/l\\\\/7AQEFzjSi\\\\/1.usa.gov\\\\/wfLQtf\", \"u\": \"http:\\\\/\\\\/www.ncbi.nlm.nih.gov\\\\/pubmed\\\\/22415991\", \"t\": 1331923247, \"hc\": 1331822918, \"cy\": \"Danvers\", \"ll\": [ 42.576698, -70.954903 ] }\\n'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "open(path).readline()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.889346Z",
     "start_time": "2018-12-25T22:52:39.708034Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n",
    "records = [json.loads(line) for line in open(path)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.939765Z",
     "start_time": "2018-12-25T22:52:39.899798Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',\n",
       " u'al': u'en-US,en;q=0.8',\n",
       " u'c': u'US',\n",
       " u'cy': u'Danvers',\n",
       " u'g': u'A6qOVH',\n",
       " u'gr': u'MA',\n",
       " u'h': u'wfLQtf',\n",
       " u'hc': 1331822918,\n",
       " u'hh': u'1.usa.gov',\n",
       " u'l': u'orofrog',\n",
       " u'll': [42.576698, -70.954903],\n",
       " u'nk': 1,\n",
       " u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',\n",
       " u't': 1331923247,\n",
       " u'tz': u'America/New_York',\n",
       " u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:39.982734Z",
     "start_time": "2018-12-25T22:52:39.949003Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "u'America/New_York'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[0]['tz']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:40.015707Z",
     "start_time": "2018-12-25T22:52:39.985681Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "America/New_York\n"
     ]
    }
   ],
   "source": [
    "print(records[0]['tz'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Counting time zones in pure Python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:40.053982Z",
     "start_time": "2018-12-25T22:52:40.019220Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "time_zones = [rec['tz'] for rec in records if ('tz' in rec and rec['tz'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:40.088248Z",
     "start_time": "2018-12-25T22:52:40.057104Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "def count_timezone(time_zones):\n",
    "    count_dict = defaultdict(int)\n",
    "    for ele in time_zones:\n",
    "        count_dict[ele] += 1\n",
    "    return count_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:40.124902Z",
     "start_time": "2018-12-25T22:52:40.090891Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "count_dict = count_timezone(time_zones=time_zones)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:41.924748Z",
     "start_time": "2018-12-25T22:57:41.895788Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(u'America/New_York', 1251),\n",
       " (u'America/Chicago', 400),\n",
       " (u'America/Los_Angeles', 382),\n",
       " (u'America/Denver', 191),\n",
       " (u'Europe/London', 74),\n",
       " (u'Asia/Tokyo', 37),\n",
       " (u'Pacific/Honolulu', 36),\n",
       " (u'Europe/Madrid', 35),\n",
       " (u'America/Sao_Paulo', 33),\n",
       " (u'Europe/Berlin', 28)]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted([(key, count) for key, count in count_dict.items()], reverse=True, key=lambda k_v:(k_v[1],k_v[0]))[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:42.898967Z",
     "start_time": "2018-12-25T22:57:42.858900Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(u'America/New_York', 1251),\n",
       " (u'America/Chicago', 400),\n",
       " (u'America/Los_Angeles', 382),\n",
       " (u'America/Denver', 191),\n",
       " (u'Europe/London', 74),\n",
       " (u'Asia/Tokyo', 37),\n",
       " (u'Pacific/Honolulu', 36),\n",
       " (u'Europe/Madrid', 35),\n",
       " (u'America/Sao_Paulo', 33),\n",
       " (u'Europe/Berlin', 28)]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from operator import itemgetter\n",
    "sorted([(key, count) for key, count in count_dict.items()], reverse=True, key=itemgetter(1))[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:43.738075Z",
     "start_time": "2018-12-25T22:57:43.701082Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(u'America/New_York', 1251), (u'America/Chicago', 400), (u'America/Los_Angeles', 382), (u'America/Denver', 191), (u'Europe/London', 74), (u'Asia/Tokyo', 37), (u'Pacific/Honolulu', 36), (u'Europe/Madrid', 35), (u'America/Sao_Paulo', 33), (u'Europe/Berlin', 28)]\n"
     ]
    }
   ],
   "source": [
    "from collections import Counter\n",
    "counter = Counter(time_zones)\n",
    "print(counter.most_common(10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:44.138134Z",
     "start_time": "2018-12-25T22:57:44.112522Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Counter?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:47.178835Z",
     "start_time": "2018-12-25T22:57:47.151999Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aaaaacccbbbbedd\n",
      "[5, 3, 4, 1, 2]\n"
     ]
    }
   ],
   "source": [
    "c = Counter('abcdeabcdabcaba')\n",
    "print(''.join(c.elements()))\n",
    "print(c.values())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Counting time zones with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:49.946187Z",
     "start_time": "2018-12-25T22:57:49.911817Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:50.496170Z",
     "start_time": "2018-12-25T22:57:50.460247Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "from numpy.random import randn\n",
    "import numpy as np\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "plt.rc('figure', figsize=(10, 6))\n",
    "np.set_printoptions(precision=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:52.074172Z",
     "start_time": "2018-12-25T22:57:51.922722Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n",
    "lines = open(path).readlines()\n",
    "records = [json.loads(line) for line in lines]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:52.243170Z",
     "start_time": "2018-12-25T22:57:52.205960Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11', u'c': u'US', u'nk': 1, u'tz': u'America/New_York', u'gr': u'MA', u'g': u'A6qOVH', u'h': u'wfLQtf', u'cy': u'Danvers', u'l': u'orofrog', u'al': u'en-US,en;q=0.8', u'hh': u'1.usa.gov', u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf', u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991', u't': 1331923247, u'hc': 1331822918, u'll': [42.576698, -70.954903]}, {u'a': u'GoogleMaps/RochesterNY', u'c': u'US', u'nk': 0, u'tz': u'America/Denver', u'gr': u'UT', u'g': u'mwszkS', u'h': u'mwszkS', u'cy': u'Provo', u'l': u'bitly', u'hh': u'j.mp', u'r': u'http://www.AwareMap.com/', u'u': u'http://www.monroecounty.gov/etc/911/rss.php', u't': 1331923249, u'hc': 1308262393, u'll': [40.218102, -111.613297]}, {u'a': u'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3)', u'c': u'US', u'nk': 1, u'tz': u'America/New_York', u'gr': u'DC', u'g': u'xxr3Qb', u'h': u'xxr3Qb', u'cy': u'Washington', u'l': u'bitly', u'al': u'en-US', u'hh': u'1.usa.gov', u'r': u'http://t.co/03elZC4Q', u'u': u'http://boxer.senate.gov/en/press/releases/031612.cfm', u't': 1331923250, u'hc': 1331919941, u'll': [38.9007, -77.043098]}, {u'a': u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.52.7 (KHTML, like Gecko) Version/5.1.2 Safari/534.52.7', u'c': u'BR', u'nk': 0, u'tz': u'America/Sao_Paulo', u'gr': u'27', u'g': u'zCaLwp', u'h': u'zUtuOu', u'cy': u'Braz', u'l': u'alelex88', u'al': u'pt-br', u'hh': u'1.usa.gov', u'r': u'direct', u'u': u'http://apod.nasa.gov/apod/ap120312.html', u't': 1331923249, u'hc': 1331923068, u'll': [-23.549999, -46.616699]}]\n"
     ]
    }
   ],
   "source": [
    "print(records[:4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:17:26.338561Z",
     "start_time": "2018-12-25T23:17:26.253528Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_heartbeat_</th>\n",
       "      <th>a</th>\n",
       "      <th>al</th>\n",
       "      <th>c</th>\n",
       "      <th>cy</th>\n",
       "      <th>g</th>\n",
       "      <th>gr</th>\n",
       "      <th>h</th>\n",
       "      <th>hc</th>\n",
       "      <th>hh</th>\n",
       "      <th>kw</th>\n",
       "      <th>l</th>\n",
       "      <th>ll</th>\n",
       "      <th>nk</th>\n",
       "      <th>r</th>\n",
       "      <th>t</th>\n",
       "      <th>tz</th>\n",
       "      <th>u</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...</td>\n",
       "      <td>en-US,en;q=0.8</td>\n",
       "      <td>US</td>\n",
       "      <td>Danvers</td>\n",
       "      <td>A6qOVH</td>\n",
       "      <td>MA</td>\n",
       "      <td>wfLQtf</td>\n",
       "      <td>1.331823e+09</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>orofrog</td>\n",
       "      <td>[42.576698, -70.954903]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://www.ncbi.nlm.nih.gov/pubmed/22415991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>GoogleMaps/RochesterNY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>US</td>\n",
       "      <td>Provo</td>\n",
       "      <td>mwszkS</td>\n",
       "      <td>UT</td>\n",
       "      <td>mwszkS</td>\n",
       "      <td>1.308262e+09</td>\n",
       "      <td>j.mp</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[40.218102, -111.613297]</td>\n",
       "      <td>0.0</td>\n",
       "      <td>http://www.AwareMap.com/</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>America/Denver</td>\n",
       "      <td>http://www.monroecounty.gov/etc/911/rss.php</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...</td>\n",
       "      <td>en-US</td>\n",
       "      <td>US</td>\n",
       "      <td>Washington</td>\n",
       "      <td>xxr3Qb</td>\n",
       "      <td>DC</td>\n",
       "      <td>xxr3Qb</td>\n",
       "      <td>1.331920e+09</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[38.9007, -77.043098]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>http://t.co/03elZC4Q</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://boxer.senate.gov/en/press/releases/0316...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...</td>\n",
       "      <td>pt-br</td>\n",
       "      <td>BR</td>\n",
       "      <td>Braz</td>\n",
       "      <td>zCaLwp</td>\n",
       "      <td>27</td>\n",
       "      <td>zUtuOu</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>1.usa.gov</td>\n",
       "      <td>NaN</td>\n",
       "      <td>alelex88</td>\n",
       "      <td>[-23.549999, -46.616699]</td>\n",
       "      <td>0.0</td>\n",
       "      <td>direct</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>America/Sao_Paulo</td>\n",
       "      <td>http://apod.nasa.gov/apod/ap120312.html</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...</td>\n",
       "      <td>en-US,en;q=0.8</td>\n",
       "      <td>US</td>\n",
       "      <td>Shrewsbury</td>\n",
       "      <td>9b6kNl</td>\n",
       "      <td>MA</td>\n",
       "      <td>9b6kNl</td>\n",
       "      <td>1.273672e+09</td>\n",
       "      <td>bit.ly</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bitly</td>\n",
       "      <td>[42.286499, -71.714699]</td>\n",
       "      <td>0.0</td>\n",
       "      <td>http://www.shrewsbury-ma.gov/selco/</td>\n",
       "      <td>1.331923e+09</td>\n",
       "      <td>America/New_York</td>\n",
       "      <td>http://www.shrewsbury-ma.gov/egov/gallery/1341...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _heartbeat_                                                  a  \\\n",
       "0          NaN  Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...   \n",
       "1          NaN                             GoogleMaps/RochesterNY   \n",
       "2          NaN  Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...   \n",
       "3          NaN  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...   \n",
       "4          NaN  Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...   \n",
       "\n",
       "               al   c          cy       g  gr       h            hc  \\\n",
       "0  en-US,en;q=0.8  US     Danvers  A6qOVH  MA  wfLQtf  1.331823e+09   \n",
       "1             NaN  US       Provo  mwszkS  UT  mwszkS  1.308262e+09   \n",
       "2           en-US  US  Washington  xxr3Qb  DC  xxr3Qb  1.331920e+09   \n",
       "3           pt-br  BR        Braz  zCaLwp  27  zUtuOu  1.331923e+09   \n",
       "4  en-US,en;q=0.8  US  Shrewsbury  9b6kNl  MA  9b6kNl  1.273672e+09   \n",
       "\n",
       "          hh   kw         l                        ll   nk  \\\n",
       "0  1.usa.gov  NaN   orofrog   [42.576698, -70.954903]  1.0   \n",
       "1       j.mp  NaN     bitly  [40.218102, -111.613297]  0.0   \n",
       "2  1.usa.gov  NaN     bitly     [38.9007, -77.043098]  1.0   \n",
       "3  1.usa.gov  NaN  alelex88  [-23.549999, -46.616699]  0.0   \n",
       "4     bit.ly  NaN     bitly   [42.286499, -71.714699]  0.0   \n",
       "\n",
       "                                                   r             t  \\\n",
       "0  http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...  1.331923e+09   \n",
       "1                           http://www.AwareMap.com/  1.331923e+09   \n",
       "2                               http://t.co/03elZC4Q  1.331923e+09   \n",
       "3                                             direct  1.331923e+09   \n",
       "4                http://www.shrewsbury-ma.gov/selco/  1.331923e+09   \n",
       "\n",
       "                  tz                                                  u  \n",
       "0   America/New_York        http://www.ncbi.nlm.nih.gov/pubmed/22415991  \n",
       "1     America/Denver        http://www.monroecounty.gov/etc/911/rss.php  \n",
       "2   America/New_York  http://boxer.senate.gov/en/press/releases/0316...  \n",
       "3  America/Sao_Paulo            http://apod.nasa.gov/apod/ap120312.html  \n",
       "4   America/New_York  http://www.shrewsbury-ma.gov/egov/gallery/1341...  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pandas import DataFrame, Series\n",
    "frame = DataFrame(records)\n",
    "frame.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:17:31.302593Z",
     "start_time": "2018-12-25T23:17:31.268627Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:55.474342Z",
     "start_time": "2018-12-25T22:57:55.424545Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3560 entries, 0 to 3559\n",
      "Data columns (total 18 columns):\n",
      "_heartbeat_    120 non-null float64\n",
      "a              3440 non-null object\n",
      "al             3094 non-null object\n",
      "c              2919 non-null object\n",
      "cy             2919 non-null object\n",
      "g              3440 non-null object\n",
      "gr             2919 non-null object\n",
      "h              3440 non-null object\n",
      "hc             3440 non-null float64\n",
      "hh             3440 non-null object\n",
      "kw             93 non-null object\n",
      "l              3440 non-null object\n",
      "ll             2919 non-null object\n",
      "nk             3440 non-null float64\n",
      "r              3440 non-null object\n",
      "t              3440 non-null float64\n",
      "tz             3440 non-null object\n",
      "u              3440 non-null object\n",
      "dtypes: float64(4), object(14)\n",
      "memory usage: 500.7+ KB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(frame.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:57.314128Z",
     "start_time": "2018-12-25T22:57:57.272256Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "America/New_York       1251\n",
       "                        521\n",
       "America/Chicago         400\n",
       "America/Los_Angeles     382\n",
       "America/Denver          191\n",
       "Europe/London            74\n",
       "Asia/Tokyo               37\n",
       "Pacific/Honolulu         36\n",
       "Name: tz, dtype: int64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame['tz'].value_counts()[:8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:57.538110Z",
     "start_time": "2018-12-25T22:57:57.498500Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.NAN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:57:57.759206Z",
     "start_time": "2018-12-25T22:57:57.721496Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.NAN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:42.592269Z",
     "start_time": "2018-12-25T22:52:42.540186Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "America/New_York       1251\n",
       "Uknown                  521\n",
       "America/Chicago         400\n",
       "America/Los_Angeles     382\n",
       "America/Denver          191\n",
       "Missing                 120\n",
       "Europe/London            74\n",
       "Asia/Tokyo               37\n",
       "Pacific/Honolulu         36\n",
       "Europe/Madrid            35\n",
       "Name: tz, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clean_tz = frame.tz.fillna('Missing')\n",
    "clean_tz[clean_tz == ''] = 'Uknown'\n",
    "clean_tz.value_counts()[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:52:05.580297Z",
     "start_time": "2018-12-25T23:52:05.178249Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n",
      "0     America/New_York\n",
      "1       America/Denver\n",
      "2     America/New_York\n",
      "3    America/Sao_Paulo\n",
      "4     America/New_York\n",
      "5     America/New_York\n",
      "6        Europe/Warsaw\n",
      "7               Uknown\n",
      "8               Uknown\n",
      "9               Uknown\n",
      "Name: tz, dtype: object\n",
      "('tz_count', <class 'pandas.core.series.Series'>)\n",
      "America/New_York       1251\n",
      "Uknown                  521\n",
      "America/Chicago         400\n",
      "America/Los_Angeles     382\n",
      "America/Denver          191\n",
      "Missing                 120\n",
      "Europe/London            74\n",
      "Asia/Tokyo               37\n",
      "Pacific/Honolulu         36\n",
      "Europe/Madrid            35\n",
      "Name: tz, dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x11d3f0890>"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAArEAAAFpCAYAAACLX3DyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2YnVV97//3h6QmqCWCUE6KSNCGIhAeJCAKKCgoND2F\n/kAkRQWrUnoo1NOfPY1FK5yfnqbVVqAqR0SB4gNW0cIBRRDLgygPCeQBELRCrHBoBcSogCjh+/tj\nrynbYSYzSSbM3DPv13XNNfde97rX/b0X4vVh7bX3pKqQJEmSumST8S5AkiRJWleGWEmSJHWOIVaS\nJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHXO9PEuQBvf\nlltuWXPmzBnvMiRJkka0dOnSB6tqq5H6GWKngDlz5rBkyZLxLkOSJGlESb4/mn5uJ5AkSVLnGGIl\nSZLUOYZYSZIkdY4hVpIkSZ1jiJUkSVLnGGIlSZLUOX7F1hSw8r7VzFl02Tpft2rxgo1QjSRJ0oZz\nJVaSJEmdY4iVJElS5xhiJUmS1DmdCbFJ1iRZ1vezaJzrWZTkmCSnJqkkv9V37h2tbf46jHdAkkuH\nOTc/yZnDnFuVZMt1fwJJkqTu6tIHux6rqt3X58Ik06vqiTGu53XAUcBcYCVwNPC+du71wO1jcZNW\n+xJgyViMJ0mSNBl0ZiV2OP0rkW3F8up2fGqSC5JcD1yQZGaSc5OsTHJrkgNbv+OSXJzk6iTfTfLe\nvrHfmOSmtvL7sSTTWvtmwLOq6oHW9Z+Bw9q5FwOrgQf7xjkryZIktyc5ra/9kCR3JrkF+H/62gfX\n/p+rtEmen+SKNtY5QMZ6TiVJkia6LoXYTQdtJ3jDKK7ZCTioqhYCJwJVVfOAhcD5SWa2fnsDRwC7\nAq9vYfglwBuAfdsK8BrgmNb/IOCqvvv8BPhBkl3orch+blAdp1TV/Db+q5Ls2u79ceC/AnsC/2Ut\ntfd7L/CNqtoZ+BLwwlHMgyRJ0qQy2bcTXFJVj7Xj/YB/AKiqO5N8H9ihnbuyqh4CSPLF1vcJeuHy\n5iQAmwI/bP0PAc4ddK8L6QXY1wGvAd7Sd+6oJMfTm+/Z9ALqJsA9VfXddt9PAccPU3u/V9JWbavq\nsiQPD/Xg7X7HA0zbbKuhukiSJHVWl0LscJ7gqRXlmYPOPTLKMWqI1wHOr6p3DdF/b+CPB7VdCnwA\nWFJVP2nBlyTbA+8E9qqqh5OcN0SdQxlt7UOqqrOBswFmzJ47+PkkSZI6rUvbCYazit6KKfS2BAzn\nOtp2gCQ70Hsb/q527uAkWyTZFDgcuJ7edoEjk/xGu2aLJNsl2Rm4s6rW9A9eVY8CfwG8f9B9N6MX\nSFcn2Ro4tLXfCcxpe2iht8VhNK4F/qDVdCiw+SivkyRJmjS6tBK7aZJlfa8vr6pFwGnAJ5L8f8DV\na7n+o8BZSVbSW709rqoebyumNwEXAS8APtW+DYAk7wauSLIJ8Et6+2r3Ay4f6gZVdeEQbcuT3Eov\ntP6AXkCmqn7e3vK/LMmj9EL2r49iHk4DPpvkduCbwL+N4hpJkqRJJVVT+53mJMcB86vqT0bZ/0rg\nzVV1/0YtbAzNmD23Zh97+jpft2rxgo1QjSRJ0vCSLG0fiF+rLq3ETghVdfB41yBJkjTVTfkQW1Xn\nAeeNcxmSJElaB5Phg12SJEmaYqb8SuxUMG+bWSxxf6skSZpEXImVJElS5xhiJUmS1DmGWEmSJHWO\nIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaS\nJEmdY4iVJElS5xhiJUmS1DmGWEmSJHXO9PEuQBvfyvtWM2fRZet17arFC8a4GkmSpA3nSqwkSZI6\nxxArSZKkzjHESpIkqXMMsZIkSeqcCRNik6xJsizJbUk+n+TZ6zHG7yVZ1I63SnJjkluT7J/ky0me\nN8L1s5NckWROktsGnTs1yTvXtaYR7ndAkktH0e9nY3lfSZKkrpswIRZ4rKp2r6pdgF8AJ6zrAFV1\nSVUtbi9fA6ysqj2q6rqq+p2q+vEIQxwCfHVd7ytJkqRn1kQKsf2uA34LIMk/J1ma5PYkxw90SHJI\nkluSLE9yVWs7LsmHk+wO/C1wWFvd3TTJqiRbtn5vTrKiXXtB330PAb4yUnFJdk9yQxvjS0k2b+1X\nJ/mbJDcl+U6S/Vv7zCTnJlnZVoYPHGLMX1npbSvScwb1+ZWV2/asx41UryRJ0mQz4b4nNsl04FDg\n8tb0h1X1oySbAjcnuYhe+P448MqquifJFv1jVNWyJH8FzK+qP2njDoy/M/Bu4BVV9eDAtUmmAb9d\nVXe08PjiJMv6hv0vwAfb8T8CJ1XVNUn+J/Be4B3t3PSq2jvJ77T2g4ATe2XVvCQ7Alck2WEMpkuS\nJGlKmkghdtO+0Hgd8Il2fHKS32/H2wJzga2Aa6vqHoCq+tE63OfVwOer6sFB174MuLGv3/eqaveB\nF0lObb9nAc+rqmvaqfOBz/dd98X2eykwpx3vB/xDu9+dSb4PbNQQ21atjweYttlWG/NWkiRJz7iJ\nFGIf6w+N0Hv7nN5K5sur6tEkVwMzN9L9+1d/N8Tj7fca1m1+n+BXt3cM9Zyj6QNAVZ0NnA0wY/bc\nWoc6JEmSJryJuid2wCzg4RZgdwT2ae03AK9Msj3A4O0EI/g68Pokzx907WuAr410cVWtBh4e2O8K\nvAm4Zi2XQG9l+Zh2vx2AFwJ3DeqzCnhp6/NSYPshxvk+sFOSGe2bFl4zUr2SJEmT0URaiR3K5cAJ\nSb5NL/TdAFBVD7S3y7+YZBPgh8DBoxmwqm5P8n7gmiRrgFuT/Dnw86r66SjrOhb43+1rwO4G3jJC\n/48CZyVZSW819biqenxgn25zEfDmJLfT29bwnSFq/0GSfwJuA+4Bbh1lvZIkSZNKqnynOckbgRf0\nfT3XpDJj9tyafezp63XtqsULxrgaSZKk4SVZWlXzR+o30VdinxFV9anxrkGSJEmjN9H3xEqSJElP\nY4iVJElS57idYAqYt80slri3VZIkTSKuxEqSJKlzDLGSJEnqHEOsJEmSOscQK0mSpM4xxEqSJKlz\nDLGSJEnqHEOsJEmSOscQK0mSpM4xxEqSJKlzDLGSJEnqHEOsJEmSOscQK0mSpM4xxEqSJKlzDLGS\nJEnqHEOsJEmSOmf6eBegjW/lfauZs+iy9bp21eIFY1yNJEnShnMlVpIkSZ1jiJUkSVLnGGIlSZLU\nOVM2xCY5PEkl2XGEfl9O8rxRjLcoySlJlrWfNX3HJ6/luk8lOXx9nkGSJGmqmsof7FoIfKP9fu9w\nnarqd0Y53uuAo6rq/QBJflZVu29wlZIkSXqaKbkSm+S5wH7AW4GjW9vsJNe2ldPbkuzf2lcl2bId\n/3OSpUluT3J833ibAc+qqgfWcs/tk/xLkhVJrkzygiH6/HWSTyR5bZIv9LUfmuTz7fiNSVa2Gv/X\n2MyIJElSt0zJEAscBlxeVd8BHkqyJ/AHwFfb6uluwLIhrvvDqtoTmA+cnOT5rf0g4KoR7vlR4Jyq\n2hX4PHB6/8kkHwI2A94GfA3YtW/8twCfbMH3fcCBwB7Avkl+dx2eW5IkaVKYqiF2IXBhO76wvb4Z\neEuSU4F5VfXTIa47Ocly4AZgW2Buaz8E+MoI93xZ3z3/Edi/79xpwIyqOrF6ngQ+DfxBki2APYEr\n2hhfr6oHq+qXwGeAVw51syTHJ1mSZMmaR1ePUJokSVK3TLk9sS0UvhqYl6SAaUABf04vEC4Azkvy\n91X1j33XHUBvxfXlVfVokquBme303sAfb0BZNwF7Jdm8qh5ubZ8ELmrHn6uqNUlGPWBVnQ2cDTBj\n9tzagNokSZImnKm4EnskcEFVbVdVc6pqW+AeegH2P6rq48A5wEsHXTcLeLgF2B2BfQCS7AzcWVVr\nRrjvDcBR7fiNwLV95y4D/g64tO3Xpap+ADwILALOa/1uBA5M8vwk0+nt571mnZ5ekiRpEphyK7H0\ntg78zaC2i+gFxUeS/BL4GfDmQX0uB05I8m3gLnqhFODQdm4kJ9Lb1/ou4D/o7XP9T1V1YZJfBy5O\nsqCqfk5vu8Bmbe8uVXVvkvcAVwMB/k9Vrd/fk5UkSeqwVPlO84ZIciXw5qq6fyOM/b+Bb1XV+Rsy\nzozZc2v2saeP3HEIqxYv2JBbS5IkrZMkS6tq/kj9puJK7JiqqoM3xrhJlgEPA8P+oQRJkqSpyhA7\nQfmHEiRJkoY3FT/YJUmSpI5zJXYKmLfNLJa4t1WSJE0irsRKkiSpcwyxkiRJ6hxDrCRJkjrHECtJ\nkqTOMcRKkiSpcwyxkiRJ6hxDrCRJkjrHECtJkqTOMcRKkiSpcwyxkiRJ6hxDrCRJkjrHECtJkqTO\nMcRKkiSpcwyxkiRJ6hxDrCRJkjrHECtJkqTOmT7eBWjjW3nfauYsumxc7r1q8YJxua8kSZrcXImV\nJElS5xhiJUmS1DmGWEmSJHXOhN4Tm2QNsLKv6cKqWjyO9SwCfgDMBX5WVR8cw7HnAJdW1S5jNaYk\nSdJkNaFDLPBYVe2+PhcmmV5VT4xxPa8DjqIXYiVJkjROOrmdIMmqJFu24/lJrm7Hpya5IMn1wAVJ\nZiY5N8nKJLcmObD1Oy7JxUmuTvLdJO/tG/uNSW5KsizJx5JMa+2bAc+qqgfWUtefJbmt/byjtc1J\n8u0kH09ye5Irkmzazu2ZZHmS5cCJfeOsre4vJrm81f23YzuzkiRJ3TDRQ+ymLUwO/LxhFNfsBBxU\nVQvpBcOqqnnAQuD8JDNbv72BI4Bdgde3MPwS4A3Avm0FeA1wTOt/EHDVcDdNsifwFuBlwD7A25Ps\n0U7PBT5SVTsDP273BTgXOKmqdhs03Nrq3r3VOA94Q5JtRzEnkiRJk8pk3E5wSVU91o73A/4BoKru\nTPJ9YId27sqqegggyRdb3yeAPYGbkwBsCvyw9T+EXugczn7Al6rqkb4x9wcuAe6pqmWt31JgTpLn\nAc+rqmtb+wXAoaOo+6qqWt3ucQewHb19ur8iyfHA8QDTNttqLWVLkiR1z0QPscN5gqdWkWcOOvfI\nKMeoIV4HOL+q3jVE/72BPx51hb/q8b7jNfTC8foaPNaQ/wyr6mzgbIAZs+cOflZJkqROm+jbCYaz\nit6KKTz11vxQrqNtB0iyA/BC4K527uAkW7T9qYcD19PbLnBkkt9o12yRZLskOwN3VtWaEe51eJJn\nJ3kO8PutbUhV9WPgx0n2a03H9J1eW92SJElT3kQPsYP3xA58vdZpwBlJltBbjRzOR4FNkqwEPgcc\nV1UDK5k3ARcBK4CLqmpJVd0BvBu4IskK4EpgNr23+S8fNPa7k9w78FNVtwDntXFvBM6pqltHeL63\nAB9JsozeKvBo6pYkSZryUjX13mlOchwwv6r+ZJT9rwTeXFX3b9TCNpIZs+fW7GNPH5d7r1q8YFzu\nK0mSuinJ0qqaP1K/ru6JfUZV1cHjXYMkSZKeMiVDbFWdR++tf0mSJHXQRN8TK0mSJD3NlFyJnWrm\nbTOLJe5NlSRJk4grsZIkSeocQ6wkSZI6xxArSZKkzjHESpIkqXMMsZIkSeocQ6wkSZI6xxArSZKk\nzjHESpIkqXMMsZIkSeocQ6wkSZI6xxArSZKkzjHESpIkqXMMsZIkSeocQ6wkSZI6xxArSZKkzpk+\n3gVo41t532rmLLpsvMsYE6sWLxjvEiRJ0gTgSqwkSZI6xxArSZKkzjHESpIkqXMMseshSSX5VN/r\n6UkeSHJpe/17SRatx7jfHMs6JUmSJis/2LV+HgF2SbJpVT0GHAzcN3Cyqi4BLlnXQavqFWNXoiRJ\n0uTlSuz6+zIw8FH5hcBnB04kOS7Jh9vx65PclmR5kmtb285JbkqyLMmKJHNb+8/a7wOSXJ3kC0nu\nTPLpJGnnfqe1LU1y5sDqryRJ0lRiiF1/FwJHJ5kJ7ArcOEy/vwJeV1W7Ab/X2k4Azqiq3YH5wL1D\nXLcH8A5gJ+BFwL7tXh8DDq2qPYGtxuphJEmSusQQu56qagUwh94q7JfX0vV64LwkbwemtbZvAX+Z\n5C+A7dqWhMFuqqp7q+pJYFm7147A3VV1T+vz2SGuAyDJ8UmWJFmy5tHV6/BkkiRJE58hdsNcAnyQ\ntYTJqjoBeDewLbA0yfOr6jP0VmUfA76c5NVDXPp43/Ea1nH/clWdXVXzq2r+tGfPWpdLJUmSJjw/\n2LVhPgn8uKpWJjlgqA5JXlxVNwI3JjkU2DbJLHorqmcmeSG97QhfH8X97gJelGROVa0C3jAmTyFJ\nktQxhtgNUFX3AmeO0O0D7YNbAa4ClgN/AbwpyS+Bfwf+1yjv91iS/wZcnuQR4Ob1Ll6SJKnDUlXj\nXYPWQZLnVtXP2rcVfAT4blV9aG3XzJg9t2Yfe/ozU+BGtmrxgpE7SZKkzkqytKrmj9TPPbHd8/Yk\ny4DbgVn0vq1AkiRpSnE7Qce0Vde1rrxKkiRNdq7ESpIkqXMMsZIkSeoctxNMAfO2mcUSPxAlSZIm\nEVdiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhi\nJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdM328C9DGt/K+1cxZ\ndNl4l9FpqxYvGO8SJElSH1diJUmS1DmGWEmSJHWOIVaSJEmdM6FCbJLDk1SSHTfS+POTnLkB1x+d\n5JQkxyV5IMmtSb6b5KtJXjGWtUqSJGl4EyrEAguBb7TfYyrJ9KpaUlUnb8AwhwKXt+PPVdUeVTUX\nWAx8MclLNrjQdZTED+dJkqQpZ8KE2CTPBfYD3goc3doOSHJNkouT3J1kcZJjktyUZGWSF7d+WyW5\nKMnN7Wff1n5qkguSXA9c0Ma7dOB+Sc5t46xIckRrPyvJkiS3Jzmtr74AuwO3DK69qv4FOBs4vvV9\ncZLLkyxNct3AynKS85KcmeSb7XmObO0XJlnQd6/zkhyZZFqSD7RnWpHkj/rm5boklwB3jOk/CEmS\npA6YSKt4hwGXV9V3kjyUZM/WvhvwEuBHwN3AOVW1d5I/BU4C3gGcAXyoqr6R5IXAV9s1ADsB+1XV\nY0kO6Lvfe4DVVTUPIMnmrf2UqvpRkmnAVUl2raoVwB7A8qqqXp59mluAP2rHZwMnVNV3k7wM+Cjw\n6nZuNr2wviNwCfAF4HPAUcBlSZ4FvAb4Y3qBfnVV7ZVkBnB9kivaOC8Fdqmqe0Y1u5IkSZPIRAqx\nC+mFUYAL2+tLgZur6n6AJN8DBkLcSuDAdnwQsFNfuNysrewCXFJVjw1xv4NoK74AVfVwOzwqyfH0\n5mY2vRC8AjgE+Mpa6k+r8bnAK4DP99Uzo6/fP1fVk8AdSbZubV8BzmhB9RDg2ha6XwvsOrBiC8wC\n5gK/AG5aW4Btz3A8wLTNtlpL2ZIkSd0zIUJski3orVTOS1LANKCAy4DH+7o+2ff6SZ6qfxNgn6r6\n+aBxAR5Zhzq2B94J7FVVDyc5D5jZTr8WOGItl+8BfLvV8uOq2n2Yfv3PE4Cq+nmSq4HXAW+gF+IH\nzp9UVV8dVOcBjPBcVXU2vRVhZsyeW2vrK0mS1DUTZU/skcAFVbVdVc2pqm2Be4D9R3n9FfS2FgCQ\nZLgA2e9K4MS+azYHNqMXDle3VdJD27lZwPSqemiogZK8it6q58er6ifAPUle384lyW6jqOdzwFvo\nPfPAh8e+Cvxxkl9rY+2Q5DmjGEuSJGlSmyghdiHwpUFtFzH6byk4GZjfPvx0B3DCKK55H7B5ktuS\nLAcOrKrlwK3AncBngOtb34OBrw26/g1JliX5DvCXwBFV9e127hjgrW3c2+nt9x3JFcCrgK9V1S9a\n2zn0Prh1S5LbgI8xQVbPJUmSxlOqfKd5JEnOofeBshvGu5b1MWP23Jp97OnjXUanrVq8YOROkiRp\ngyVZWlXzR+rnqt4oVNXbxrsGSZIkPWWibCeQJEmSRs0QK0mSpM5xO8EUMG+bWSxxT6ckSZpEXImV\nJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS\n5xhiJUmS1DmGWEmSJHWOIVaSJEmdY4iVJElS5xhiJUmS1DmGWEmSJHXO9PEuQBvfyvtWM2fRZeNd\nhqa4VYsXjHcJkqRJxJVYSZIkdY4hVpIkSZ1jiJUkSVLnGGIlSZLUOYZYSZIkdc6oQmySw5NUkh03\nRhFJ5ic5cwOuPzrJKUmOS/Lhsayt7x7TkzyQZPHGGL/dY1WSLTfW+JIkSZPFaFdiFwLfaL/HVJLp\nVbWkqk7egGEOBS4fq5qGcTDwHeD1SbKR7yVJkqS1GDHEJnkusB/wVuDo1nZAkmuSXJzk7iSLkxyT\n5KYkK5O8uPXbKslFSW5uP/u29lOTXJDkeuCCNt6lA/dLcm4bZ0WSI1r7WUmWJLk9yWl99QXYHbhl\nLc+wsI13W5K/aW3TkpzX2lYm+e8jTMVC4Azg34CX9429KslpSW5p4+zY9+xXtnrPSfL9gVXWJG9s\nc7UsyceSTBui5qf1WY+aJUmSJqXRrMQeBlxeVd8BHkqyZ2vfDTgBeAnwJmCHqtobOAc4qfU5A/hQ\nVe0FHNHODdgJOKiqBq/uvgdYXVXzqmpX4Out/ZSqmg/sCrwqya6tfQ9geVXVUMUn+U3gb4BX0wu7\neyU5vB1vU1W7VNU84NzhJiDJTOAg4P8An+XpK9IPVtVLgbOAd7a29wJfr6qdgS8AL2xjvQR4A7Bv\nVe0OrAGOGXS/4fqsS83Ht9C/ZM2jq4frJkmS1EmjCbELgQvb8YU8FeBurqr7q+px4HvAFa19JTCn\nHR8EfDjJMuASYLO2sgtwSVU9NsT9DgI+MvCiqh5uh0cluQW4FdiZXggGOAT4ylrq3wu4uqoeqKon\ngE8DrwTuBl6U5B+SHAL8ZC1j/C7wL63ei4DDB62efrH9Xtr37PvR5q2qLgcGnuM1wJ7AzW1eXgO8\naND9husz6pqr6uyqml9V86c9e9ZaHk2SJKl71vpnZ5NsQW8Fc16SAqYBBVwGPN7X9cm+10/2jbsJ\nsE9V/XzQuACPjLbIJNvTW+Hcq6oeTnIeMLOdfi29Vd510sbZDXgdvRXlo4A/HKb7QmC/JKva6+fT\nm5cr2+uBZ1/DyH/KN8D5VfWu9emzDjVLkiRNWiOtxB4JXFBV21XVnKraFrgH2H+U41/BU1sLSLL7\nKK65Ejix75rNgc3ohd7VSbam90EukswCplfVQ2sZ7yZ62w+2bKunC4Fr2v7UTarqIuDdwEuHujjJ\nZvSe94VtDua0+kb6kNv19EImSV4LbN7arwKOTPIb7dwWSbYbdO2QfUZbsyRJ0mQ3UohdCHxpUNtF\njP5bCk4G5rcPaN1Bb/VwJO8DNm8fXloOHFhVy+ltI7gT+Ay9gAi9bwz42qDrj0ty78APvdXjRcC/\nAMuBpVV1MbANcHV7u/5TwHAro79Pb29r/8rzxcB/TTJjLc9xGvDaJLcBrwf+HfhpVd1BL4BekWQF\nvdA+u//CtfQZbc2SJEmTWob5PFQnJDkHOKeqbhjvWgZrAXdNVT2R5OXAWe1DWs+4GbPn1uxjTx+P\nW0v/adXiBeNdgiSpA5IsbR/mX6uR9m9OaFX1tvGuYS1eCPxTkk2AXwBvH+d6JEmSJo1Oh9ixluQj\nwL6Dms+oqmG/ymo4VfVdel//JUmSpDHW6e0EGp358+fXkiVLxrsMSZKkEY12O8Fo/+ysJEmSNGEY\nYiVJktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucYYiVJ\nktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucYYiVJktQ5hlhJkiR1zvTxLkAb38r7VjNn0WXjXYY0rFWL\nF4x3CZKkjnElVpIkSZ1jiJUkSVLnGGIlSZLUOeMaYpMcnqSS7LiRxp+f5MwNuP7oJKe040OTLEly\nR5Jbk/xdaz8vyZFDXPubSb6w/tVLkiRpOOO9ErsQ+Eb7PaaSTK+qJVV18gYMcyhweZJdgA8Db6yq\nnYD5wL+u7cKq+r9V9bRwK0mSpA03biE2yXOB/YC3Ake3tgOSXJPk4iR3J1mc5JgkNyVZmeTFrd9W\nSS5KcnP72be1n5rkgiTXAxe08S4duF+Sc9s4K5Ic0drPaiustyc5ra++ALsDtwD/A3h/Vd0JUFVr\nquqsvsd5ZZJvtpqPbNfPSXJbO56W5INJbmv3Pqm1/1Wr/7YkZ7d7kmSv1m9Zkg/0jTOz7xluTXLg\nRvmHI0mSNMGN50rsYcDlVfUd4KEke7b23YATgJcAbwJ2qKq9gXOAk1qfM4APVdVewBHt3ICdgIOq\navDq7nuA1VU1r6p2Bb7e2k+pqvnArsCrkuza2vcAlldVAbsAS9fyLLPpBfLfBRYPcf54YA6we7v3\np1v7h6tqr6raBdi0XQ9wLvBHVbU7sKZvnBOBqqp59Favz08ycy11SZIkTUrjGWIXAhe24wt5akvB\nzVV1f1U9DnwPuKK1r6QXBAEOAj6cZBlwCbBZW9kFuKSqHhvifgcBHxl4UVUPt8OjktwC3ArsTC8E\nAxwCfGWUz/LPVfVkVd0BbD3MvT9WVU+0e/+otR+Y5MYkK4FXAzsneR7w61X1rdbnM33j7Ad8qo1x\nJ/B9YIehCkpyfFthXrLm0dWjfAxJkqRuGJc/dpBkC3qhbV6SAqYBBVwGPN7X9cm+10/yVL2bAPtU\n1c8HjQvwyDrUsT3wTmCvqno4yXnAwMrma+mt8gLcDuwJLB9mqP6aM8p7zwQ+Csyvqh8kObXv3hus\nqs4GzgaYMXtujdW4kiRJE8F4rcQeCVxQVdtV1Zyq2ha4B9h/lNdfwVNbC0iy+yiuuZLe2/ED12wO\nbEYv9K5OsjW9D3KRZBYwvaoeat0/APxlkh3a+U2SnDDKWgfu/UdJprfrt+CpwPpgW0U+EqCqfgz8\nNMnL2vlgP6gBAAANnklEQVSj+8a5DjimjbED8ELgrnWoQ5IkaVIYrxC7EPjSoLaLGP23FJwMzG8f\nfrqD3h7akbwP2Lx9iGo5cGBVLae3jeBOem/bX9/6Hgx8beDCqloBvAP4bJJvA7cBLxplrdDbs/tv\nwIp27z9oYfXjbayvAjf39X8r8PG2XeI5wMB+gI8Cm7TtB58DjmvbLiRJkqaU9D63pH5JzgHOqaob\nxun+z62qn7XjRcDsqvrT9R1vxuy5NfvY08esPmmsrVq8YLxLkCRNEEmWtg/dr9W47Imd6KrqbeNc\nwoIk76L3z+f7wHHjW44kSdLEYoidgKrqc/S2C0iSJGkI4/0XuyRJkqR15krsFDBvm1kscc+hJEma\nRFyJlSRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnWOI\nlSRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnTN9vAvQ\nxrfyvtXMWXTZeJchTRmrFi8Y7xIkadJzJVaSJEmdY4iVJElS5xhiJUmS1DmG2FFKMifJbYPaTk3y\nziRXJ5k/XrVJkiRNNYZYSZIkdY4hdgwl2STJeUne117/LMn7kyxPckOSrVv7nCRfT7IiyVVJXphk\nWpJ70vO8JGuSvLL1vzbJ3Lby+8m28nt3kpPH83klSZLGiyF27EwHPg18t6re3dqeA9xQVbsB1wJv\nb+3/AJxfVbu2a86sqjXAXcBOwH7ALcD+SWYA21bVd9u1OwKvA/YG3pvk1zb+o0mSJE0shtjRqxHa\nPwbcVlXv7zv3C+DSdrwUmNOOXw58ph1fQC+0AlwHvLL9/HVr3wu4uW/My6rq8ap6EPghsPVQRSU5\nPsmSJEvWPLp65KeTJEnqEEPs6D0EbD6obQvgwXb8TeDAJDP7zv+yqgZC7hpG/uMS1wL701tl/TLw\nPOAAeuF2wON9x8OOWVVnV9X8qpo/7dmzRritJElStxhiR6mqfgbcn+TVAEm2AA4BvtG6fIJe8Pyn\nJCOF1W8CR7fjY3gqpN4EvAJ4sqp+DiwD/oheuJUkSVJjiF03bwbek2QZ8HXgtKr63sDJqvp74Fbg\ngiRrm9uTgLckWQG8CfjTdv3jwA+AG1q/64BfB1aO9YNIkiR1WZ56t1uT1YzZc2v2saePdxnSlLFq\n8YLxLkGSOivJ0qoa8fv3XYmVJElS5xhiJUmS1DmGWEmSJHXOSJ+i1yQwb5tZLHGPniRJmkRciZUk\nSVLnGGIlSZLUOYZYSZIkdY4hVpIkSZ1jiJUkSVLnGGIlSZLUOYZYSZIkdY4hVpIkSZ1jiJUkSVLn\nGGIlSZLUOYZYSZIkdY4hVpIkSZ1jiJUkSVLnGGIlSZLUOYZYSZIkdc708S5AG9/K+1YzZ9Fl412G\nJEnqqFWLF4x3CU/jSqwkSZI6xxArSZKkzjHESpIkqXMMsZIkSeqcjRZikxyepJLsuJHGn5/kzA24\n/ugkpyQ5LsmTSXbtO3dbkjljUWffmG9P8rm+15sl+V6SF63DGJ9KcvhY1iVJktRFG3MldiHwjfZ7\nTCWZXlVLqurkDRjmUODydnwvcMqGV7ZW5wDbJjmovf6fwCer6u7RXJzEb5KQJElqNkqITfJcYD/g\nrcDRre2AJNckuTjJ3UkWJzkmyU1JViZ5ceu3VZKLktzcfvZt7acmuSDJ9cAFbbxLB+6X5Nw2zook\nR7T2s5IsSXJ7ktP66guwO3BLa7oU2DnJbw/xLK9N8q0ktyT5fLvXXkm+2M4fluSxJM9KMjPJkKG0\nqgo4ATg9yXzgNcAH2hgvTXJjq/2iJLNa+zeSfCjJEuBPBtX110k+kcQtIZIkacrZWAHoMODyqvoO\n8FCSPVv7bvSC3EuANwE7VNXe9FYpT2p9zgA+VFV7AUe0cwN2Ag6qqsGru+8BVlfVvKraFfh6az+l\nquYDuwKv6tsysAewvAVLgCeBvwX+sn/QJFsC7273fCmwBPgz4FZ6IRhgf+A2YC/gZcCNw01KVa0A\nvgpcBZxUVb9opz4F/Fmr/a72PAOmVdX8qjq9r64PAZsBb6uqJ4e6V5LjW4BfsubR1cOVJEmS1Ekb\nK8QuBC5sxxfy1JaCm6vq/qp6HPgecEVrXwnMaccHAR9Osgy4BNisrewCXFJVjw1xv4OAjwy8qKqH\n2+FRSW6hFzp3pheCAQ4BvjJojM8A+yTZvq9tn3bN9a2eY4HtquoJ4HtJXgLsDfw98Ep6gfa6YWel\n5yPAfVV1NUCS5wMzq+r6dv78NtaAz/3q5ZwGzKiqE/tC+NNU1dkt/M6f9uxZI5QkSZLULWO+zzLJ\nFsCrgXlJCpgGFHAZ8Hhf1yf7Xj/ZV8smwD5V9fNB4wI8sg51bA+8E9irqh5Och4ws51+Lb1V3v9U\nVU8k+TvgL/qHAa4cYuUX4Fp6+2p/CXwNOI/es/75CKU92X5Ga/Az3wTslWTzvrAuSZI0pWyMldgj\ngQuqaruqmlNV2wL30FulHI0reGprAUl2X0vfAVcCJ/Zdszm9t9sfAVYn2Zpe4KTtN51eVQ8NMc55\n9FZ1t2qvbwD2TfJb7drnJNmhnbsOeAfwrap6AHg+8Nv0thaMWqvjsSSvaE1vAq5ZyyWXAX8HXNq3\nQi1JkjSlbIwQuxD40qC2ixj9txScDMxvH3K6g94e2pG8D9i8fTXWcuDAqlpObxvBnfS2Cgy8XX8w\nvZXTp2l7VM8EfqO9fgA4DvhskhXAt4CBrwy7Edia3ooswApg5dre4l+LNwEfavfYqT3PsKrqQnqB\n++IkM9fWV5IkaTLK+mWu7kpyDnBOVd0w3rU8U2bMnluzjz195I6SJElDWLV4wTN2ryRL2wfz12rK\nffdoVb1tvGuQJEnShplyIfaZkOQjwL6Dms+oqnPHox5JkqTJxhC7EVTViSP3kiRJ0voyxE4B87aZ\nxZJncC+LJEnSxuafLJUkSVLnGGIlSZLUOYZYSZIkdY4hVpIkSZ1jiJUkSVLnGGIlSZLUOYZYSZIk\ndU6qarxr0EaW5KfAXeNdxySzJfDgeBcxiTifY885HXvO6dhzTsfeZJjT7apqq5E6+ccOpoa7qmr+\neBcxmSRZ4pyOHedz7DmnY885HXvO6dibSnPqdgJJkiR1jiFWkiRJnWOInRrOHu8CJiHndGw5n2PP\nOR17zunYc07H3pSZUz/YJUmSpM5xJVaSJEmdY4idxJIckuSuJP+aZNF419MVSbZN8i9J7khye5I/\nbe1bJLkyyXfb7837rnlXm+e7krxu/KqfuJJMS3Jrkkvba+dzAyV5XpIvJLkzybeTvNx5XX9J/nv7\nd/62JJ9NMtP5XHdJPpnkh0lu62tb53lMsmeSle3cmUnyTD/LRDHMnH6g/bu/IsmXkjyv79yUmFND\n7CSVZBrwEeBQYCdgYZKdxreqzngC+H+raidgH+DENneLgKuqai5wVXtNO3c0sDNwCPDRNv/6VX8K\nfLvvtfO54c4ALq+qHYHd6M2v87oekmwDnAzMr6pdgGn05sv5XHfn0ZuTfuszj2cBbwfmtp/BY04l\n5/H0578S2KWqdgW+A7wLptacGmInr72Bf62qu6vqF8CFwGHjXFMnVNX9VXVLO/4pvWCwDb35O791\nOx84vB0fBlxYVY9X1T3Av9KbfzVJXgAsAM7pa3Y+N0CSWcArgU8AVNUvqurHOK8bYjqwaZLpwLOB\n/4vzuc6q6lrgR4Oa12kek8wGNquqG6r34Z1/7LtmyhlqTqvqiqp6or28AXhBO54yc2qInby2AX7Q\n9/re1qZ1kGQOsAdwI7B1Vd3fTv07sHU7dq5HdjrwP4An+9qczw2zPfAAcG7bpnFOkufgvK6XqroP\n+CDwb8D9wOqqugLnc6ys6zxu044Ht2tofwh8pR1PmTk1xErDSPJc4CLgHVX1k/5z7b9i/WqPUUjy\nu8APq2rpcH2cz/UyHXgpcFZV7QE8QnuLdoDzOnptj+Zh9P7j4DeB5yR5Y38f53NsOI9jK8kp9LbB\nfXq8a3mmGWInr/uAbftev6C1aRSS/Bq9APvpqvpia/6P9nYM7fcPW7tzvXb7Ar+XZBW9bS2vTvIp\nnM8NdS9wb1Xd2F5/gV6odV7Xz0HAPVX1QFX9Evgi8Aqcz7GyrvN4H0+9Pd7frj5JjgN+FzimnvrO\n1Ckzp4bYyetmYG6S7ZM8i94m70vGuaZOaJ/W/ATw7ar6+75TlwDHtuNjgYv72o9OMiPJ9vQ2y9/0\nTNU70VXVu6rqBVU1h97/Dr9eVW/E+dwgVfXvwA+S/HZreg1wB87r+vo3YJ8kz27/H/Aaevvhnc+x\nsU7z2LYe/CTJPu2fx5v7rhG9byCit03r96rq0b5TU2ZOp493Ado4quqJJH8CfJXep2w/WVW3j3NZ\nXbEv8CZgZZJlre0vgcXAPyV5K/B94CiAqro9yT/RCxBPACdW1ZpnvuzOcT433EnAp9t/qN4NvIXe\n4oTzuo6q6sYkXwBuoTc/t9L7y0fPxflcJ0k+CxwAbJnkXuC9rN+/7/+N3qfyN6W33/MrTFHDzOm7\ngBnAle2bsm6oqhOm0pz6F7skSZLUOW4nkCRJUucYYiVJktQ5hlhJkiR1jiFWkiRJnWOIlSRJUucY\nYiVJktQ5hlhJkiR1jiFWkiRJnfP/Az4poM/fVfCHAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11d57ec50>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(type(clean_tz))\n",
    "print(clean_tz[:10])\n",
    "tz_counts = clean_tz.value_counts()\n",
    "print('tz_count', type(tz_counts))\n",
    "print(tz_counts[:10])\n",
    "tz_counts[:10].plot(kind='barh', rot=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:52:43.195024Z",
     "start_time": "2018-12-25T22:52:43.152336Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0                  Mozilla/5.0\n",
      "1       GoogleMaps/RochesterNY\n",
      "2                  Mozilla/4.0\n",
      "3                  Mozilla/5.0\n",
      "4                  Mozilla/5.0\n",
      "5                  Mozilla/5.0\n",
      "6                  Mozilla/5.0\n",
      "7                  Mozilla/5.0\n",
      "8                   Opera/9.80\n",
      "9                  Mozilla/5.0\n",
      "10                 Mozilla/5.0\n",
      "11                 Mozilla/5.0\n",
      "12                 Mozilla/5.0\n",
      "13                 Mozilla/5.0\n",
      "14                 Mozilla/5.0\n",
      "15                 Mozilla/5.0\n",
      "16                 Mozilla/5.0\n",
      "17      GoogleMaps/RochesterNY\n",
      "18                 Mozilla/5.0\n",
      "19                 Mozilla/5.0\n",
      "20                 Mozilla/5.0\n",
      "21                 Mozilla/4.0\n",
      "22                 Mozilla/5.0\n",
      "23                 Mozilla/5.0\n",
      "24                 Mozilla/5.0\n",
      "25                 Mozilla/5.0\n",
      "26                 Mozilla/5.0\n",
      "27                 Mozilla/5.0\n",
      "28                 Mozilla/5.0\n",
      "29                 Mozilla/5.0\n",
      "                 ...          \n",
      "3410               Mozilla/5.0\n",
      "3411               Mozilla/5.0\n",
      "3412               Mozilla/5.0\n",
      "3413               Mozilla/5.0\n",
      "3414               Mozilla/5.0\n",
      "3415               Mozilla/5.0\n",
      "3416               Mozilla/5.0\n",
      "3417               Mozilla/5.0\n",
      "3418               Mozilla/5.0\n",
      "3419               Mozilla/5.0\n",
      "3420               Mozilla/5.0\n",
      "3421               Mozilla/5.0\n",
      "3422               Mozilla/5.0\n",
      "3423               Mozilla/5.0\n",
      "3424               Mozilla/5.0\n",
      "3425               Mozilla/5.0\n",
      "3426               Mozilla/5.0\n",
      "3427               Mozilla/5.0\n",
      "3428               Mozilla/5.0\n",
      "3429               Mozilla/5.0\n",
      "3430               Mozilla/4.0\n",
      "3431               Mozilla/5.0\n",
      "3432               Mozilla/5.0\n",
      "3433               Mozilla/4.0\n",
      "3434               Mozilla/4.0\n",
      "3435               Mozilla/4.0\n",
      "3436               Mozilla/5.0\n",
      "3437    GoogleMaps/RochesterNY\n",
      "3438            GoogleProducer\n",
      "3439               Mozilla/4.0\n",
      "Name: ala, Length: 3440, dtype: object\n"
     ]
    }
   ],
   "source": [
    "result = Series([x.split()[0] for x in frame.a.dropna()], name='ala')\n",
    "result[:5]\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T22:58:04.680991Z",
     "start_time": "2018-12-25T22:58:04.647168Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Mozilla/5.0               2594\n",
       "Mozilla/4.0                601\n",
       "GoogleMaps/RochesterNY     121\n",
       "Opera/9.80                  34\n",
       "TEST_INTERNET_AGENT         24\n",
       "Name: ala, dtype: int64"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.value_counts()[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:01:02.171156Z",
     "start_time": "2018-12-25T23:01:02.123689Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 3440 entries, 0 to 3559\n",
      "Data columns (total 18 columns):\n",
      "_heartbeat_    0 non-null float64\n",
      "a              3440 non-null object\n",
      "al             3094 non-null object\n",
      "c              2919 non-null object\n",
      "cy             2919 non-null object\n",
      "g              3440 non-null object\n",
      "gr             2919 non-null object\n",
      "h              3440 non-null object\n",
      "hc             3440 non-null float64\n",
      "hh             3440 non-null object\n",
      "kw             93 non-null object\n",
      "l              3440 non-null object\n",
      "ll             2919 non-null object\n",
      "nk             3440 non-null float64\n",
      "r              3440 non-null object\n",
      "t              3440 non-null float64\n",
      "tz             3440 non-null object\n",
      "u              3440 non-null object\n",
      "dtypes: float64(4), object(14)\n",
      "memory usage: 510.6+ KB\n"
     ]
    }
   ],
   "source": [
    "cframe = frame[frame.a.notnull()]\n",
    "cframe.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:02:26.635844Z",
     "start_time": "2018-12-25T23:02:26.604770Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<type 'numpy.ndarray'>\n",
      "['Windows' 'Not Windows' 'Windows' ..., 'Not Windows' 'Not Windows'\n",
      " 'Windows']\n",
      "(3440,)\n"
     ]
    }
   ],
   "source": [
    "operation_system = np.where(cframe.a.str.contains('Windows'), 'Windows', 'Not Windows')\n",
    "print(type(operation_system))\n",
    "print(operation_system)\n",
    "print(operation_system.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:03:30.905626Z",
     "start_time": "2018-12-25T23:03:30.870581Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Windows', 'Not Windows', 'Windows', 'Not Windows', 'Windows'],\n",
       "      dtype='|S11')"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "operation_system[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:04:23.117980Z",
     "start_time": "2018-12-25T23:04:23.089795Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<pandas.core.groupby.DataFrameGroupBy object at 0x10df5ce50>\n",
      "<class 'pandas.core.groupby.DataFrameGroupBy'>\n"
     ]
    }
   ],
   "source": [
    "by_tz_os = cframe.groupby(['tz', operation_system])\n",
    "print(by_tz_os)\n",
    "print(type(by_tz_os))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:14:32.586784Z",
     "start_time": "2018-12-25T23:14:32.539377Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<type 'tuple'>\n",
      "(u'', 'Not Windows')\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 245 entries, 8 to 3536\n",
      "Data columns (total 18 columns):\n",
      "_heartbeat_    0 non-null float64\n",
      "a              245 non-null object\n",
      "al             239 non-null object\n",
      "c              0 non-null object\n",
      "cy             0 non-null object\n",
      "g              245 non-null object\n",
      "gr             0 non-null object\n",
      "h              245 non-null object\n",
      "hc             245 non-null float64\n",
      "hh             245 non-null object\n",
      "kw             4 non-null object\n",
      "l              245 non-null object\n",
      "ll             0 non-null object\n",
      "nk             245 non-null float64\n",
      "r              245 non-null object\n",
      "t              245 non-null float64\n",
      "tz             245 non-null object\n",
      "u              245 non-null object\n",
      "dtypes: float64(4), object(14)\n",
      "memory usage: 36.4+ KB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "for name,group in by_tz_os:\n",
    "    print(type(name))\n",
    "    print(name)\n",
    "    print(group.info())\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:15:21.632968Z",
     "start_time": "2018-12-25T23:15:21.600107Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tz                                         \n",
      "                                Not Windows    245\n",
      "                                Windows        276\n",
      "Africa/Cairo                    Windows          3\n",
      "Africa/Casablanca               Windows          1\n",
      "Africa/Ceuta                    Windows          2\n",
      "Africa/Johannesburg             Windows          1\n",
      "Africa/Lusaka                   Windows          1\n",
      "America/Anchorage               Not Windows      4\n",
      "                                Windows          1\n",
      "America/Argentina/Buenos_Aires  Not Windows      1\n",
      "America/Argentina/Cordoba       Windows          1\n",
      "America/Argentina/Mendoza       Windows          1\n",
      "America/Bogota                  Not Windows      1\n",
      "                                Windows          2\n",
      "America/Caracas                 Windows          1\n",
      "America/Chicago                 Not Windows    115\n",
      "                                Windows        285\n",
      "America/Chihuahua               Not Windows      1\n",
      "                                Windows          1\n",
      "America/Costa_Rica              Windows          1\n",
      "America/Denver                  Not Windows    132\n",
      "                                Windows         59\n",
      "America/Edmonton                Not Windows      2\n",
      "                                Windows          4\n",
      "America/Guayaquil               Not Windows      2\n",
      "America/Halifax                 Not Windows      1\n",
      "                                Windows          3\n",
      "America/Indianapolis            Not Windows      8\n",
      "                                Windows         12\n",
      "America/La_Paz                  Windows          1\n",
      "                                              ... \n",
      "Europe/Madrid                   Not Windows     16\n",
      "                                Windows         19\n",
      "Europe/Malta                    Windows          2\n",
      "Europe/Moscow                   Not Windows      1\n",
      "                                Windows          9\n",
      "Europe/Oslo                     Not Windows      2\n",
      "                                Windows          8\n",
      "Europe/Paris                    Not Windows      4\n",
      "                                Windows         10\n",
      "Europe/Prague                   Not Windows      3\n",
      "                                Windows          7\n",
      "Europe/Riga                     Not Windows      1\n",
      "                                Windows          1\n",
      "Europe/Rome                     Not Windows      8\n",
      "                                Windows         19\n",
      "Europe/Skopje                   Windows          1\n",
      "Europe/Sofia                    Windows          1\n",
      "Europe/Stockholm                Not Windows      2\n",
      "                                Windows         12\n",
      "Europe/Uzhgorod                 Windows          1\n",
      "Europe/Vienna                   Not Windows      3\n",
      "                                Windows          3\n",
      "Europe/Vilnius                  Windows          2\n",
      "Europe/Volgograd                Windows          1\n",
      "Europe/Warsaw                   Not Windows      1\n",
      "                                Windows         15\n",
      "Europe/Zurich                   Not Windows      4\n",
      "Pacific/Auckland                Not Windows      3\n",
      "                                Windows          8\n",
      "Pacific/Honolulu                Windows         36\n",
      "Length: 149, dtype: int64\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(by_tz_os.size())\n",
    "print(type(by_tz_os.size()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:20:22.219737Z",
     "start_time": "2018-12-25T23:20:22.165194Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 97 entries,  to Pacific/Honolulu\n",
      "Data columns (total 2 columns):\n",
      "Not Windows    97 non-null float64\n",
      "Windows        97 non-null float64\n",
      "dtypes: float64(2)\n",
      "memory usage: 2.3+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Not Windows</th>\n",
       "      <th>Windows</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tz</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <td>245.0</td>\n",
       "      <td>276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Africa/Cairo</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Africa/Casablanca</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Africa/Ceuta</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Africa/Johannesburg</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Not Windows  Windows\n",
       "tz                                       \n",
       "                           245.0    276.0\n",
       "Africa/Cairo                 0.0      3.0\n",
       "Africa/Casablanca            0.0      1.0\n",
       "Africa/Ceuta                 0.0      2.0\n",
       "Africa/Johannesburg          0.0      1.0"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_counts = by_tz_os.size().unstack().fillna(0)\n",
    "agg_counts.info()\n",
    "agg_counts.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:19:50.088864Z",
     "start_time": "2018-12-25T23:19:50.039549Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Series.unstack?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:23:02.153152Z",
     "start_time": "2018-12-25T23:23:02.117892Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Series.argsort?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:25:13.588197Z",
     "start_time": "2018-12-25T23:25:13.550552Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tz\n",
       "                                   521.0\n",
       "Africa/Cairo                         3.0\n",
       "Africa/Casablanca                    1.0\n",
       "Africa/Ceuta                         2.0\n",
       "Africa/Johannesburg                  1.0\n",
       "Africa/Lusaka                        1.0\n",
       "America/Anchorage                    5.0\n",
       "America/Argentina/Buenos_Aires       1.0\n",
       "America/Argentina/Cordoba            1.0\n",
       "America/Argentina/Mendoza            1.0\n",
       "America/Bogota                       3.0\n",
       "America/Caracas                      1.0\n",
       "America/Chicago                    400.0\n",
       "America/Chihuahua                    2.0\n",
       "America/Costa_Rica                   1.0\n",
       "America/Denver                     191.0\n",
       "America/Edmonton                     6.0\n",
       "America/Guayaquil                    2.0\n",
       "America/Halifax                      4.0\n",
       "America/Indianapolis                20.0\n",
       "America/La_Paz                       1.0\n",
       "America/Lima                         1.0\n",
       "America/Los_Angeles                382.0\n",
       "America/Managua                      3.0\n",
       "America/Mazatlan                     1.0\n",
       "America/Mexico_City                 15.0\n",
       "America/Monterrey                    1.0\n",
       "America/Montevideo                   1.0\n",
       "America/Montreal                     9.0\n",
       "America/New_York                  1251.0\n",
       "                                   ...  \n",
       "Europe/Berlin                       28.0\n",
       "Europe/Bratislava                    3.0\n",
       "Europe/Brussels                      4.0\n",
       "Europe/Bucharest                     4.0\n",
       "Europe/Budapest                      5.0\n",
       "Europe/Copenhagen                    5.0\n",
       "Europe/Dublin                        3.0\n",
       "Europe/Helsinki                     10.0\n",
       "Europe/Lisbon                        8.0\n",
       "Europe/Ljubljana                     1.0\n",
       "Europe/London                       74.0\n",
       "Europe/Madrid                       35.0\n",
       "Europe/Malta                         2.0\n",
       "Europe/Moscow                       10.0\n",
       "Europe/Oslo                         10.0\n",
       "Europe/Paris                        14.0\n",
       "Europe/Prague                       10.0\n",
       "Europe/Riga                          2.0\n",
       "Europe/Rome                         27.0\n",
       "Europe/Skopje                        1.0\n",
       "Europe/Sofia                         1.0\n",
       "Europe/Stockholm                    14.0\n",
       "Europe/Uzhgorod                      1.0\n",
       "Europe/Vienna                        6.0\n",
       "Europe/Vilnius                       2.0\n",
       "Europe/Volgograd                     1.0\n",
       "Europe/Warsaw                       16.0\n",
       "Europe/Zurich                        4.0\n",
       "Pacific/Auckland                    11.0\n",
       "Pacific/Honolulu                    36.0\n",
       "Length: 97, dtype: float64"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_counts.sum(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:36:37.743750Z",
     "start_time": "2018-12-25T23:36:37.702143Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tz\n",
       "                                  24\n",
       "Africa/Cairo                      20\n",
       "Africa/Casablanca                 21\n",
       "Africa/Ceuta                      92\n",
       "Africa/Johannesburg               87\n",
       "Africa/Lusaka                     53\n",
       "America/Anchorage                 54\n",
       "America/Argentina/Buenos_Aires    57\n",
       "America/Argentina/Cordoba         26\n",
       "America/Argentina/Mendoza         55\n",
       "America/Bogota                    62\n",
       "America/Caracas                   34\n",
       "America/Chicago                   60\n",
       "America/Chihuahua                 36\n",
       "America/Costa_Rica                37\n",
       "America/Denver                    27\n",
       "America/Edmonton                  76\n",
       "America/Guayaquil                 56\n",
       "America/Halifax                   89\n",
       "America/Indianapolis               2\n",
       "America/La_Paz                     4\n",
       "America/Lima                       5\n",
       "America/Los_Angeles                7\n",
       "America/Managua                    8\n",
       "America/Mazatlan                   9\n",
       "America/Mexico_City               86\n",
       "America/Monterrey                 11\n",
       "America/Montevideo                14\n",
       "America/Montreal                  52\n",
       "America/New_York                  84\n",
       "                                  ..\n",
       "Europe/Berlin                     28\n",
       "Europe/Bratislava                 43\n",
       "Europe/Brussels                   31\n",
       "Europe/Bucharest                  46\n",
       "Europe/Budapest                   74\n",
       "Europe/Copenhagen                 83\n",
       "Europe/Dublin                     81\n",
       "Europe/Helsinki                   80\n",
       "Europe/Lisbon                     95\n",
       "Europe/Ljubljana                  38\n",
       "Europe/London                     88\n",
       "Europe/Madrid                     82\n",
       "Europe/Malta                      25\n",
       "Europe/Moscow                     93\n",
       "Europe/Oslo                       30\n",
       "Europe/Paris                      19\n",
       "Europe/Prague                     64\n",
       "Europe/Riga                       32\n",
       "Europe/Rome                       85\n",
       "Europe/Skopje                     67\n",
       "Europe/Sofia                      35\n",
       "Europe/Stockholm                  78\n",
       "Europe/Uzhgorod                   96\n",
       "Europe/Vienna                     59\n",
       "Europe/Vilnius                    77\n",
       "Europe/Volgograd                  15\n",
       "Europe/Warsaw                     22\n",
       "Europe/Zurich                     12\n",
       "Pacific/Auckland                   0\n",
       "Pacific/Honolulu                  29\n",
       "Length: 97, dtype: int64"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indexer = agg_counts.sum(1).argsort()\n",
    "indexer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:38:26.510804Z",
     "start_time": "2018-12-25T23:38:26.469034Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Not Windows</th>\n",
       "      <th>Windows</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tz</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>America/New_York</th>\n",
       "      <td>339.0</td>\n",
       "      <td>912.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <td>245.0</td>\n",
       "      <td>276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>America/Chicago</th>\n",
       "      <td>115.0</td>\n",
       "      <td>285.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Not Windows  Windows\n",
       "tz                                    \n",
       "America/New_York        339.0    912.0\n",
       "                        245.0    276.0\n",
       "America/Chicago         115.0    285.0"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_counts.take([29, 0, 12])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:43:28.440060Z",
     "start_time": "2018-12-25T23:43:28.397174Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Not Windows</th>\n",
       "      <th>Windows</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tz</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>America/Sao_Paulo</th>\n",
       "      <td>13.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Europe/Madrid</th>\n",
       "      <td>16.0</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pacific/Honolulu</th>\n",
       "      <td>0.0</td>\n",
       "      <td>36.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Asia/Tokyo</th>\n",
       "      <td>2.0</td>\n",
       "      <td>35.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Europe/London</th>\n",
       "      <td>43.0</td>\n",
       "      <td>31.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>America/Denver</th>\n",
       "      <td>132.0</td>\n",
       "      <td>59.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>America/Los_Angeles</th>\n",
       "      <td>130.0</td>\n",
       "      <td>252.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>America/Chicago</th>\n",
       "      <td>115.0</td>\n",
       "      <td>285.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <td>245.0</td>\n",
       "      <td>276.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>America/New_York</th>\n",
       "      <td>339.0</td>\n",
       "      <td>912.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Not Windows  Windows\n",
       "tz                                       \n",
       "America/Sao_Paulo           13.0     20.0\n",
       "Europe/Madrid               16.0     19.0\n",
       "Pacific/Honolulu             0.0     36.0\n",
       "Asia/Tokyo                   2.0     35.0\n",
       "Europe/London               43.0     31.0\n",
       "America/Denver             132.0     59.0\n",
       "America/Los_Angeles        130.0    252.0\n",
       "America/Chicago            115.0    285.0\n",
       "                           245.0    276.0\n",
       "America/New_York           339.0    912.0"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_subset = agg_counts.take(indexer)[-10:]\n",
    "count_subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:43:29.460298Z",
     "start_time": "2018-12-25T23:43:29.421759Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tz\n",
       "America/Sao_Paulo        33.0\n",
       "Europe/Madrid            35.0\n",
       "Pacific/Honolulu         36.0\n",
       "Asia/Tokyo               37.0\n",
       "Europe/London            74.0\n",
       "America/Denver          191.0\n",
       "America/Los_Angeles     382.0\n",
       "America/Chicago         400.0\n",
       "                        521.0\n",
       "America/New_York       1251.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count_subset.sum(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:53:54.171181Z",
     "start_time": "2018-12-25T23:53:53.831187Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x11dc0c250>"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAr8AAAFpCAYAAACVlkBBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XucXWV97/HPl0QTFBIIUE4UMOgJ5ZJgAsMtIchNheKp\nKIggKFgp1YMgXlqxtiX0WKUHKuBdwBMsolAFkcIRwQsQCAgJCQk3L0AElFMhQhQE5PI7f+w1sBlm\nkgmZMJf9eb9e85q1n/WsZ/3WXrz0m2eevXaqCkmSJKkTrDXYBUiSJEkvFcOvJEmSOobhV5IkSR3D\n8CtJkqSOYfiVJElSxzD8SpIkqWMYfiVJktQxDL+SJEnqGIZfSZIkdQzDryRJkjrG6MEuQEPXhhtu\nWJMmTRrsMiRJklZqwYIFD1bVRivrZ/hVnyZNmsT8+fMHuwxJkqSVSvKr/vRz2YMkSZI6huFXkiRJ\nHcPwK0mSpI5h+JUkSVLHMPxKkiSpYxh+JUmS1DF81Jn69puFMHv8YFchSZKGq9nLB7uCF3DmV5Ik\nSR3D8CtJkqSOYfiVJElSxzD8SpIkqWMMufCbZP8klWTLNTR+V5LPrcbxByf5ZJIjkjyTZNu2fbck\nmTQQdbaN+ddJzm97PS7JnUleuwpjfCPJ/gNZlyRJ0nA05MIvcAhwTfN7QCUZXVXzq+rY1RhmX+Cy\nZvs+4JOrX9kKnQVsmmTv5vU/A/+nqu7qz8FJfKKHJElSY0iF3yTrALsC7wMObtp2T3JVku8luSvJ\nSUkOTXJDkiVJXtf02yjJBUlubH5mNu2zk5yT5FrgnGa8S7rPl2ROM87iJAc07V9OMj/JrUlObKsv\nwDTgpqbpEmCbJH/ey7W8Kcl1SW5K8u3mXDskubDZ/9YkjyV5eZKxSXoNs1VVwPuB05J0AXsBJzdj\nbJfkp03tFyQZ37Rfk+TUJPOBD/ao6zNJvpZkSN17SZKkl8JQmxV8K3BZVf08ybIk2zftrwe2An4H\n3AWcVVU7JvkQcAxwHHA6cGpVXZNkM+AHzTEAWwO7VtVjSXZvO98/AsuraipAkvWb9k9W1e+SjAJ+\nlGTbqloMTAdurqpq5WCeAf438PfA4d2DJtkQ+Adg76p6NMnHgY8An6YVngFmAbcAO9C6Dz/t602p\nqsVJfgD8CHhrVf2p2fUN4K+r6tokn26u52PNvlFV1dXU843m96nAy4Ejm1D9AkmOAo4CGDVuIyY9\nPqevsqSOs/Sk/Qa7BEnSahpqs3+HAOc12+fx3NKHG6vq/qp6ArgTuLxpXwJMarb3Br6QZBFwMTCu\nmUkGuLiqHuvlfHsDX+x+UVUPNZsHJbkJWAhsQys8A+wDfL/HGN8Edk6yeVvbzs0x1zb1HA68pqqe\nAu5MshWwI/BZYDdaQXhun+9KyxeBX1fVlQBJNgDGVtW1zf6vN2N1O//5h3MiMKaqju4r+AJU1RlV\n1VVVXaNe4RdcSJKkkWXIzPwmmQDsCUxNUsAooIBLgSfauj7T9voZnruGtYCdq+rxHuMCPLoKdWxO\na/Z0h6p6KMnZwNhm95uAA9r7V9VTSf4N+Hj7MMAVVdXbuuWraa0bfhL4IXA2rWv925WU9kzz0189\nr/kGYIck67eFfEmSpI4ylGZ+DwTOqarXVNWkqtoUuJvWrGh/XE5rCQQASaatoG+3K4Cj245ZHxhH\nKzguT7IxraBKs552dFUt62Wcs2nNIm/UvL4emJnkvzfHvjLJFs2+ubSWaVxXVQ8AGwB/TmsJRL81\ndTyWZEbT9G7gqhUccinwb8AlbTPikiRJHWUohd9DgO/2aLuA/j/14Vigq/nw1220PiS2Mp8C1m8e\nUXYzsEdV3UxrucMdtJY0dC8reCOtmdoXaNbgfg74s+b1A8ARwLeSLAauA7of3fZTYGNaM8AAi4El\nK1qKsALvBk5tzrF1cz19qqrzaAX17yUZu6K+kiRJI1FeXObqPEnOovVBu+sHu5aXypiJk2vi4acN\ndhnSkOEH3iRp6EqyoPvD/isyZNb8DnVVdeRg1yBJkqTVY/gdQpJ8EZjZo/n0qvJ5Y5IkSQPAZQ/q\nU1dXV82fP3+wy5AkSVqp/i57GEofeJMkSZLWKMOvJEmSOobhV5IkSR3D8CtJkqSOYfiVJElSxzD8\nSpIkqWMYfiVJktQxDL+SJEnqGIZfSZIkdQzDryRJkjqG4VeSJEkdw/ArSZKkjmH4lSRJUscw/EqS\nJKljGH4lSZLUMUYPdgEawn6zEGaPH+wq1KlmLx/sCiRJI5Azv5IkSeoYhl9JkiR1DMOvJEmSOobh\nV5IkSR3D8CtJkqSOYfiVJElSxzD8SpIkqWMYfiVJktQx/JILPU+So4CjAEaN24hJj88Z5Io03Cw9\nab/BLkGSpD4586vnqaozqqqrqrpGvcJvd5MkSSOL4VeSJEkdw/ArSZKkjmH4lSRJUscw/EqSJKlj\nGH4lSZLUMQy/kiRJ6hg+51d9mvrq8cz3ma2SJGkEceZXkiRJHcPwK0mSpI5h+JUkSVLHMPxKkiSp\nYxh+JUmS1DEMv5IkSeoYhl9JkiR1DMOvJEmSOobhV5IkSR3D8CtJkqSOYfiVJElSxzD8SpIkqWMY\nfiVJktQxDL+SJEnqGIZfSZIkdYzRg12AhrDfLITZ4we7Cun5Zi8f7AokScOYM7+SJEnqGIZfSZIk\ndQzDryRJkjrGsAy/SfZPUkm2XEPjdyX53Gocf3CSTzbb+yaZn+S2JAuT/FvTfnaSA3s59lVJvvPi\nq5ckSVJfhmX4BQ4Brml+D6gko6tqflUduxrD7AtclmQK8AXgsKraGugCfrmiA6vqN1X1glAsSZKk\n1Tfswm+SdYBdgfcBBzdtuye5Ksn3ktyV5KQkhya5IcmSJK9r+m2U5IIkNzY/M5v22UnOSXItcE4z\n3iXd50sypxlncZIDmvYvNzO6tyY5sa2+ANOAm4C/A/6lqu4AqKqnq+rLbZezW5J5Tc0HNsdPSnJL\nsz0qySlJbmnOfUzT/k9N/bckOaM5J0l2aPotSnJy2zhj265hYZI91sjNkSRJGuKGXfgF3gpcVlU/\nB5Yl2b5pfz3wfmAr4N3AFlW1I3AWcEzT53Tg1KraATig2ddta2Dvquo5m/yPwPKqmlpV2wI/bto/\nWVVdwLbAG5Js27RPB26uqgKmAAtWcC0TaQX5twAn9bL/KGASMK0597lN+xeqaoeqmgKs3RwPMAf4\nm6qaBjzdNs7RQFXVVFqz5V9PMnYFdUmSJI1Iw/E5v4fQCrEA5zWvLwFurKr7AZLcCVze9FkCdM90\n7g1s3UyUAoxrZpIBLq6qx3o53940M8wAVfVQs3lQkqNovYcTaYXnxcA+wPf7eS0XVdUzwG1JNu7j\n3F+pqqeac/+uad8jyd8BrwAmALcmmQusW1XXNX2+yXOheFfg880YdyT5FbBFU+/zNNd0FMCocRsx\n6fE5/byUkW3pSfsNdgmSJGkADKvwm2QCsCcwNUkBo4ACLgWeaOv6TNvrZ3juOtcCdq6qx3uMC/Do\nKtSxOfAxYIeqeijJ2UD3TOqbaM0qA9wKbA/c3MdQ7TWnjz49zz0W+BLQVVX3Jpnddu7VVlVnAGcA\njJk4uQZqXEmSpKFguC17OBA4p6peU1WTqmpT4G5gVj+Pv5znlkCQZFo/jrmC1rKB7mPWB8bRCsvL\nmxnbfZt944HRVbWs6X4y8PdJtmj2r5Xk/f2stfvcf5NkdHP8BJ4Lug82s9YHAlTVw8AfkuzU7D+4\nbZy5wKHNGFsAmwE/W4U6JEmSRoThFn4PAb7bo+0C+v/Uh2OBruZDYbfRWiO8Mp8C1m8+XHYzsEdV\n3QwsBO6gtbzg2qbvG4Efdh9YVYuB44BvJbkduAV4bT9rhdaa5HuAxc2539WE3DObsX4A3NjW/33A\nmUkWAa8Eur8H9kvAWkmWAOcDR1RV+6yzJElSR0jrc1kaCEnOAs6qqusH6fzrVNUjzfbxwMSq+tCL\nHW/MxMk18fDTBqy+4cw1v5IkDW1JFjQPI1ihYbXmd6irqiMHuYT9knyC1n39FXDE4JYjSZI0tBh+\nR5CqOp/WsgZJkiT1Yrit+ZUkSZJeNGd+1aeprx7PfNe6SpKkEcSZX0mSJHUMw68kSZI6huFXkiRJ\nHcPwK0mSpI5h+JUkSVLHMPxKkiSpYxh+JUmS1DEMv5IkSeoYhl9JkiR1DMOvJEmSOobhV5IkSR3D\n8CtJkqSOYfiVJElSxzD8SpIkqWMYfiVJktQxDL+SJEnqGKMHuwANYb9ZCLPHD3YVGu5mLx/sCiRJ\nepYzv5IkSeoYhl9JkiR1DMOvJEmSOobhV5IkSR1jjYbfJPsnqSRbrqHxu5J8bjWOPzjJJ5MckeQL\nA1lb2zlGJ3kgyUlrYvzmHEuTbLimxpckSRop1vTM7yHANc3vAZVkdFXNr6pjV2OYfYHLBqqmPrwR\n+DnwjiRZw+eSJEnSCqyx8JtkHWBX4H3AwU3b7kmuSvK9JHclOSnJoUluSLIkyeuafhsluSDJjc3P\nzKZ9dpJzklwLnNOMd0n3+ZLMacZZnOSApv3LSeYnuTXJiW31BZgG3LSCazikGe+WJP/atI1KcnbT\ntiTJh1fyVhwCnA7cA+zSNvbSJCcmuakZZ8u2a7+iqfesJL/qntVNcljzXi1K8tUko3qp+QV9XkTN\nkiRJI9KafM7vW4HLqurnSZYl2b5pfz2wFfA74C7grKraMcmHgGOA42iFxVOr6pokmwE/aI4B2BrY\ntaoeS7J72/n+EVheVVMBkqzftH+yqn7XBMUfJdm2qhYD04Gbq6p6m5BN8irgX4HtgYeAy5PsD9wL\nvLqqpjT91uvrDUgyFtgb+BtgPVpBeF5blwerarsk/xP4GHAkcALw46r6TJJ9aP3jgSRbAe8EZlbV\nk0m+BBwK/Hvb+frqc+sq1HwUcBTAqHEbMenxOX11VZulJ+032CVIkqR+WJPLHg4Bzmu2z+O5pQ83\nVtX9VfUEcCdwedO+BJjUbO8NfCHJIuBiYFwzkwxwcVU91sv59ga+2P2iqh5qNg9KchOwENiGVngG\n2Af4/grq3wG4sqoeqKqngHOB3WgF9tcm+XwTTn+/gjHeAvykqfcCYP8es7UXNr8XtF37rjTvW1Vd\nRit4A+xFK4jf2LwvewGv7XG+vvr0u+aqOqOquqqqa9Qr/IILSZI0sqyRmd8kE4A9galJChgFFHAp\n8ERb12faXj/TVs9awM5V9XiPcQEeXYU6Nqc1o7pDVT2U5GxgbLP7TcAB/b+qlmac1wNvBt4PHAT8\nVR/dDwF2TbK0eb0BrffliuZ197U/zcrvRYCvV9UnXkyfVahZkiRpxFpTM78HAudU1WuqalJVbQrc\nDczq5/GX01oCAUCSaf045grg6LZj1gfG0QrLy5NsTOsDbiQZD4yuqmUrGO8G4A1JNmxmaw8BrmrW\n365VVRcA/wBs19vBScbRut7NmvdgUlPfyj78dy2tcEqSNwHdyzd+BByY5M+afROSvKbHsb326W/N\nkiRJI92aCr+HAN/t0XYB/X/qw7FAV/PBtdtozVauzKeA9ZsPdd0M7FFVN9Na7nAH8E1awRJaT2D4\nYY/jj0hyX/cPrdnq44GfADcDC6rqe8CrgSubZQXfAPqaiX0brbW77TPd3wP+R5IxK7iOE4E3JbkF\neAfw/4A/VNVttILr5UkW0wr7E9sPXEGf/tYsSZI0oqWqBruGl1ySs2h90O76wa6lpyYYP11VTyXZ\nBfhyVfVn5nvAjZk4uSYeftpgnHrY8QNvkiQNriQLqqprZf3W5NMehqyqOnKwa1iBzYD/SLIW8Cfg\nrwe5HkmSpBGjI8PvQEvyRWBmj+bTq2qVnxNWVb+g9Rg2SZIkDbCOXPag/unq6qr58+cPdhmSJEkr\n1d9lD2v6640lSZKkIcPwK0mSpI5h+JUkSVLHMPxKkiSpYxh+JUmS1DEMv5IkSeoYhl9JkiR1DMOv\nJEmSOobhV5IkSR3D8CtJkqSOYfiVJElSxzD8SpIkqWMYfiVJktQxDL+SJEnqGIZfSZIkdYzRg12A\nhrDfLITZ4we7Cpi9fLArkCRJI4Qzv5IkSeoYhl9JkiR1DMOvJEmSOsaICL9J9k9SSbZcQ+N3Jfnc\nahx/cJJPJjkiyQNJFib5RZIfJJkxkLVKkiSpbyMi/AKHANc0vwdUktFVNb+qjl2NYfYFLmu2z6+q\n6VU1GTgJuDDJVqtd6CpK4ocdJUlSxxn24TfJOsCuwPuAg5u23ZNcleR7Se5KclKSQ5PckGRJktc1\n/TZKckGSG5ufmU377CTnJLkWOKcZ75Lu8yWZ04yzOMkBTfuXk8xPcmuSE9vqCzANuKln7VX1E+AM\n4Kim7+uSXJZkQZK53TPZSc5O8rkk85rrObBpPy/Jfm3nOjvJgUlGJTm5uabFSf6m7X2Zm+Ri4LYB\nvRGSJEnDwEiY/XsrcFlV/TzJsiTbN+2vB7YCfgfcBZxVVTsm+RBwDHAccDpwalVdk2Qz4AfNMQBb\nA7tW1WNJdm873z8Cy6tqKkCS9Zv2T1bV75KMAn6UZNuqWgxMB26uqmrl4Be4CfibZvsM4P1V9Ysk\nOwFfAvZs9k2kFfK3BC4GvgOcDxwEXJrk5cBewAdo/UNgeVXtkGQMcG2Sy5txtgOmVNXd/Xp3JUmS\nRpCREH4PoRViAc5rXl8C3FhV9wMkuRPoDn9LgD2a7b2BrdtC6bhmJhng4qp6rJfz7U0zwwxQVQ81\nmwclOYrWezqRVnheDOwDfH8F9aepcR1gBvDttnrGtPW7qKqeAW5LsnHT9n3g9Cbg7gNc3YT1NwHb\nds8QA+OBycCfgBtWFHybazgKYNS4jZj0+JwVlP4SOf7Swa6gX5aetN/KO0mSpEE1rMNvkgm0Zkan\nJilgFFDApcATbV2faXv9DM9d91rAzlX1eI9xAR5dhTo2Bz4G7FBVDyU5Gxjb7H4TcMAKDp8O3N7U\n8nBVTeujX/v1BKCqHk9yJfBm4J20wn/3/mOq6gc96tydlVxXVZ1BawaaMRMn14r6SpIkDTfDfc3v\ngcA5VfWaqppUVZsCdwOz+nn85bSWQACQpK/g2e4K4Oi2Y9YHxtEKlcubWdl9m33jgdFVtay3gZK8\ngdYs65lV9Xvg7iTvaPYlyev7Uc/5wHtpXXP3h+p+AHwgycuasbZI8sp+jCVJkjSiDffwewjw3R5t\nF9D/pz4cC3Q1Hwq7DXh/P475FLB+kluS3AzsUVU3AwuBO4BvAtc2fd8I/LDH8e9MsijJz4G/Bw6o\nqtubfYcC72vGvZXWeuaVuRx4A/DDqvpT03YWrQ+03ZTkFuCrDPNZfkmSpIGQKv+yvaYkOYvWB+2u\nH+xaXowxEyfXxMNPG+wyhg3X/EqSNHiSLKiqrpX1czZwDaqqIwe7BkmSJD1nuC97kCRJkvrN8CtJ\nkqSOYfiVJElSx3DNr/o09dXjme+HuCRJ0gjizK8kSZI6huFXkiRJHcPwK0mSpI5h+JUkSVLHWGn4\nTfK1JNN6tM1eYxVJkiRJa0h/Zn7fDHw9yXva2v5yDdUjSZIkrTH9Cb+/BXYD3pHki0lGA1mzZUmS\nJEkDrz/hN1W1vKr+B/AAcCUwfo1WJUmSJK0B/Qm/V3RvVNVs4F+Bu9dUQZIkSdKa0p/wu3f7i6r6\nT2CjNVOOJEmStOb0+fXGST4A/E/gdUkWt+1aF7h2TRcmSZIkDbQ+wy/wTeD7wGeA49va/1BVv1uj\nVUmSJElrQJ/ht6qWA8uBQ166ciRJkqQ1x294kyRJUscw/EqSJKljrGjNrzrdbxbC7AF4pPPs5as/\nhiRJ0gBw5leSJEkdw/ArSZKkjmH4lSRJUscYkWt+kzwNLGlrOq+qThrEeo4H7gUmA49U1SkDOPYk\n4JKqmjJQY0qSJI1UIzL8Ao9V1bQXc2CS0VX11ADX82bgIFrhV5IkSYOko5Y9JFmaZMNmuyvJlc32\n7CTnJLkWOCfJ2CRzkixJsjDJHk2/I5J8L8mVSX6R5IS2sQ9LckOSRUm+mmRU0z4OeHlVPbCCuj6S\n5Jbm57imbVKS25OcmeTWJJcnWbvZt32Sm5PcDBzdNs6K6r4wyWVN3f97YN9ZSZKk4WGkht+1mxDa\n/fPOfhyzNbB3VR1CK1BWVU2l9Q13X08ytum3I3AAsC3wjiZEbwW8E5jZzDg/DRza9N8b+FFfJ02y\nPfBeYCdgZ+Cvk0xvdk8GvlhV2wAPN+cFmAMcU1Wv7zHciuqe1tQ4FXhnkk378Z5IkiSNKC57eM7F\nVfVYs70r8HmAqrojya+ALZp9V1TVMoAkFzZ9nwK2B25MArA28Num/z60wmpfdgW+W1WPto05C7gY\nuLuqFjX9FgCTkqwHrFdVVzft5wD79qPuHzVfWU2S24DX0FqH/DxJjgKOAhg1biMmPb6i0vvp+Ev7\n3LX0pP1Wf3xJkqR+Gqnhty9P8dxs99ge+x7t5xjVy+sAX6+qT/TSf0fgA/2u8PmeaNt+mlaofrF6\njtXrva+qM4AzAMZMnNzzWiVJkoa1kbrsoS9Lac3QwnNLCHozl2bZQpItgM2AnzX73phkQrP+dn/g\nWlrLGg5M8mfNMROSvCbJNsAdVfX0Ss61f5JXJHkl8LamrVdV9TDwcJJdm6ZD23avqG5JkqSON1LD\nb881v92POTsROD3JfFqzn335ErBWkiXA+cARVdU9c3oDcAGwGLigquZX1W3APwCXJ1kMXAFMpLUc\n4bIeY/9Dkvu6f6rqJuDsZtyfAmdV1cKVXN97gS8mWURr1rk/dUuSJHW8VPmX7f5KcgTQVVUf7Gf/\nK4D3VNX9a7SwNWTMxMk18fDT1ug5XPMrSZIGQpIFVdW1sn6dtub3JVVVbxzsGiRJkvQcw+8qqKqz\naS1RkCRJ0jA0Utf8SpIkSS/gzK/6NPXV45nvmlxJkjSCOPMrSZKkjmH4lSRJUscw/EqSJKljGH4l\nSZLUMQy/kiRJ6hiGX0mSJHUMw68kSZI6huFXkiRJHcPwK0mSpI5h+JUkSVLHMPxKkiSpYxh+JUmS\n1DEMv5IkSeoYhl9JkiR1DMOvJEmSOsbowS5AQ9hvFsLs8QM33uzlAzeWJEnSi+DMryRJkjqG4VeS\nJEkdw/ArSZKkjmH4lSRJUscw/K6iJPsnqSRbrqTf/02yXj/GOz7JJ5Msan6ebts+dgXHfSPJ/i/m\nGiRJkjqVT3tYdYcA1zS/T+irU1X9RT/HezNwUFX9C0CSR6pq2mpXKUmSpBdw5ncVJFkH2BV4H3Bw\n0zYxydXNTO0tSWY17UuTbNhsX5RkQZJbkxzVNt444OVV9cAKzrl5kp8kWZzkiiSb9NLnM0m+luRN\nSb7T1r5vkm8324clWdLU+OmBeUckSZKGF8PvqnkrcFlV/RxYlmR74F3AD5rZ2tcDi3o57q+qanug\nCzg2yQZN+97Aj1Zyzi8BZ1XVtsC3gdPadyY5FRgHHAn8ENi2bfz3Av+nCcyfAvYApgMzk7xlFa5b\nkiRpRHDZw6o5BDi92T6veX0xrYD5MuCiquot/B6b5G3N9qbAZGAZsA8wZyXn3AnoDqr/Dvyvtn0n\nAtdW1f9sXleSc4F3Nb+3b2rcH/hxVT0IkOSbwG7AJT1P1sxMHwUwatxGMPu3KylPkiRp+DD89lOS\nCcCewNQkBYwCCvhbWkFyP+DsJJ+tqn9vO253WjO8u1TVH5NcCYxtdu8IfGA1yroB2CHJ+lX1UNP2\nf4ALmu3zq+rpJP0esKrOAM4AGDNxcq1GbZIkSUOOyx7670DgnKp6TVVNqqpNgbtpBd//qqozgbOA\n7XocNx54qAm+WwI7AyTZBrijqp5eyXmvBw5qtg8Drm7bdynwb8AlzXpkqupe4EHgeODspt9PgT2S\nbJBkNK31ylet0tVLkiSNAM789t8hwL/2aLuAVsB8NMmTwCPAe3r0uQx4f5LbgZ/RCrMA+zb7VuZo\nWssqPgH8F611vM+qqvOSrAt8L8l+VfU48E1gXLM2maq6L8k/AlcCAf6zqi7tx7klSZJGlFT5l+3B\nkOQK4D1Vdf8aGPsrwHVV9fXVGWfMxMn1xP2/GKCqJEmS1pwkC6qqa2X9nPkdJFX1xjUxbpJFwENA\nn1+QIUmS1KkMvyOMX5AhSZLUNz/wJkmSpI5h+FWfpr56/GCXIEmSNKAMv5IkSeoYhl9JkiR1DMOv\nJEmSOobhV5IkSR3D8CtJkqSOYfiVJElSxzD8SpIkqWMYfiVJktQxDL+SJEnqGIZfSZIkdQzDryRJ\nkjqG4VeSJEkdw/ArSZKkjmH4lSRJUscw/EqSJKljjB7sAjSE/WYhzB4/sGPOXj6w40mSJK0CZ34l\nSZLUMQy/kiRJ6hiGX0mSJHWMYR9+kzydZFGSW5J8O8krXsQYf5nk+GZ7oyQ/TbIwyawk/zfJeis5\nfmKSy5NMSnJLj32zk3xsVWtayfl2T3JJP/o9MpDnlSRJGu6GffgFHquqaVU1BfgT8P5VHaCqLq6q\nk5qXewFLqmp6Vc2tqr+oqodXMsQ+wA9W9bySJEl6aY2E8NtuLvDfAZJclGRBkluTHNXdIck+SW5K\ncnOSHzVtRyT5QpJpwP8G3trMJq+dZGmSDZt+70myuDn2nLbz7gN8f2XFJZmW5PpmjO8mWb9pvzLJ\nvya5IcnPk8xq2scmmZNkSTMTvUcvYz5vZrmZAZ/Uo8/zZoqbaz1iZfVKkiSNNCPmUWdJRgP7Apc1\nTX9VVb9LsjZwY5ILaIX9M4HdquruJBPax6iqRUn+Ceiqqg8243aPvw3wD8CMqnqw+9gko4A/r6rb\nmtD5uiSL2ob9b8Apzfa/A8dU1VVJ/hk4ATiu2Te6qnZM8hdN+97A0a2yamqSLYHLk2wxAG+XJElS\nRxoJ4XfttrA5F/has31skrc125sCk4GNgKur6m6AqvrdKpxnT+DbVfVgj2N3An7a1u/OqprW/SLJ\n7Ob3eGC9qrqq2fV14Nttx13Y/F4ATGq2dwU+35zvjiS/AtZo+G1myY8C2GyzzWD2r9bk6SRJkl5S\nIyH8PtYeNqH1Z35aM6e7VNUfk1wJjF1D52+fbV4dTzS/n2bV7stTPH/5Sm/X2Z8+AFTVGcAZAF1d\nXbUKdUjtXguzAAAXfElEQVSSJA15I23Nb7fxwENN8N0S2Llpvx7YLcnmAD2XPazEj4F3JNmgx7F7\nAT9c2cFVtRx4qHs9L/Bu4KoVHAKtmexDm/NtAWwG/KxHn6XAdk2f7YDNexnnV8DWScY0T67Ya2X1\nSpIkjUQjYea3N5cB709yO62weD1AVT3Q/Fn/wiRrAb8F3tifAavq1iT/AlyV5GlgYZK/BR6vqj/0\ns67Dga80j2O7C3jvSvp/CfhykiW0Zm+PqKonutchNy4A3pPkVlrLL37eS+33JvkP4BbgbmBhP+uV\nJEkaUVLlX7ZfrCSHAZu0PSZtROnq6qr58+cPdhmSJEkrlWRBVXWtrN9Infl9SVTVNwa7BkmSJPXf\nSF3zK0mSJL2A4VeSJEkdw/ArSZKkjmH4lSRJUscw/EqSJKljGH4lSZLUMQy/kiRJ6hiGX0mSJHUM\nw68kSZI6huFXkiRJHcPwK0mSpI5h+JUkSVLHMPxKkiSpYxh+JUmS1DEMv5IkSeoYhl9JkiR1DMOv\nJEmSOobhV5IkSR3D8CtJkqSOMXqwC9AQ9puFMHt8//rOXr5ma5EkSRoAzvxKkiSpYxh+JUmS1DEM\nv5IkSeoYIz78Jnk6yaK2n+MHuZ7jkxyaZHaSSvLf2/Yd17R1rcJ4uye5pI99XUk+18e+pUk2XPUr\nkCRJGr464QNvj1XVtBdzYJLRVfXUANfzZuAgYDKwBDgY+FSz7x3ArQNxkqb2+cD8gRhPkiRpJBjx\nM799aZ/5bGZIr2y2Zyc5J8m1wDlJxiaZk2RJkoVJ9mj6HZHke0muTPKLJCe0jX1YkhuameavJhnV\ntI8DXl5VDzRdLwLe2ux7HbAceLBtnC8nmZ/k1iQntrXvk+SOJDcBb29r71n7s7PCSTZIcnkz1llA\nBvo9lSRJGuo6Ifyu3WPZwzv7cczWwN5VdQhwNFBVNRU4BPh6krFNvx2BA4BtgXc0IXor4J3AzGbG\n+Wng0Kb/3sCP2s7ze+DeJFNozQCf36OOT1ZVVzP+G5Js25z7TOB/ANsD/20Ftbc7AbimqrYBvgts\n1o/3QZIkaURx2UPvLq6qx5rtXYHPA1TVHUl+BWzR7LuiqpYBJLmw6fsUrVB6YxKAtYHfNv33Aeb0\nONd5tILvm4G9gPe27TsoyVG07tNEWsF2LeDuqvpFc95vAEf1UXu73Whmiavq0iQP9XbhzfmOAhg1\nbiMmPd6z3OdbetJ+K9wvSZI0lHRC+O3LUzw38z22x75H+zlG9fI6wNer6hO99N8R+ECPtkuAk4H5\nVfX7JjCTZHPgY8AOVfVQkrN7qbM3/a29V1V1BnAGwJiJk3tenyRJ0rDWCcse+rKU1gwttJYu9GUu\nzbKFJFvQWi7ws2bfG5NMSLI2sD9wLa1lDQcm+bPmmAlJXpNkG+COqnq6ffCq+iPwceBfepx3HK0g\nuzzJxsC+TfsdwKRmjTC0lmL0x9XAu5qa9gXW7+dxkiRJI0YnzPyunWRR2+vLqup44ETga0n+F3Dl\nCo7/EvDlJEtozRYfUVVPNDO0NwAXAJsA32ierkCSfwAuT7IW8CStdcO7Apf1doKqOq+XtpuTLKQV\ndu+lFaypqsebpQmXJvkjrXC+bj/ehxOBbyW5FZgH3NOPYyRJkkaUVPmX7RcjyRFAV1V9sJ/9rwDe\nU1X3r9HCBtCYiZNr4uGnrbCPa34lSdJQkGRB86CAFeqEmd8hoareONg1SJIkdTrD74tUVWcDZw9y\nGZIkSVoFnfyBN0mSJHUYZ37Vp6mvHs981/RKkqQRxJlfSZIkdQzDryRJkjqGyx4kSZL68OSTT3Lf\nfffx+OOPD3YpaowdO5ZNNtmEl73sZS/qeMOvJElSH+677z7WXXddJk2aRPMFVxpEVcWyZcu47777\n2HzzzV/UGC57kCRJ6sPjjz/OBhtsYPAdIpKwwQYbrNZMvOFXkiRpBQy+Q8vq3g/DryRJ0hCWhI9+\n9KPPvj7llFOYPXv2Co+56KKLuO22217Q/vDDD7PBBhtQVQBcd911JOG+++4DYPny5UyYMIFnnnmG\nf/qnf+KHP/zhKtU6adIkHnzwwVU65qXmml9JkqR+mnT8pQM63tJ+PE9/zJgxXHjhhXziE59gww03\n7Ne4F110EW95y1vYeuutn9e+3nrrMXHiRG6//Xa23npr5s2bx/Tp05k3bx4HHXQQ119/PTvuuCNr\nrbUW//zP//yirmmoc+ZXkiRpCBs9ejRHHXUUp5566gv2LV26lD333JNtt92Wvfbai3vuuYd58+Zx\n8cUX87d/+7dMmzaNO++883nHzJgxg3nz5gEwb948PvzhDz/v9cyZMwE44ogj+M53vgO0ZnRPOOEE\ntttuO6ZOncodd9wBwLJly3jTm97ENttsw5FHHvnsjDLAZz/7WaZMmcKUKVM47bTTADj55JP53Oc+\nB8CHP/xh9txzTwB+/OMfc+ihh/L0009zxBFHMGXKFKZOndrrNa8uw68kSdIQd/TRR3PuueeyfPny\n57Ufc8wxHH744SxevJhDDz2UY489lhkzZvCXf/mXnHzyySxatIjXve51zztm5syZz4bdu+66i3e8\n4x3Mnz8faIXfGTNm9FrDhhtuyE033cQHPvABTjnlFABOPPFEdt11V2699Vbe9ra3cc899wCwYMEC\n5syZw09/+lOuv/56zjzzTBYuXMisWbOYO3cuAPPnz+eRRx7hySefZO7cuey2224sWrSIX//619xy\nyy0sWbKE9773vQP3JjYMv5IkSUPcuHHjeM973vPsrGm36667jne9610AvPvd7+aaa65Z6VjdM793\n3303kyZNYuzYsVQVjzzyCAsWLGCnnXbq9bi3v/3tAGy//fYsXboUgKuvvprDDjsMgP3224/1118f\ngGuuuYa3ve1tvPKVr2Sdddbh7W9/O3PnzmX77bdnwYIF/P73v2fMmDHssssuzJ8/n7lz5zJr1ixe\n+9rXctddd3HMMcdw2WWXMW7cuBf1fq2I4VeSJGkYOO644/ja177Go48+ulrjTJ48mYcffpj//M//\nZJdddgFagXbOnDlMmjSJddZZp9fjxowZA8CoUaN46qmnXtS5X/ayl7H55ptz9tlnM2PGDGbNmsVP\nfvITfvnLX7LVVlux/vrrc/PNN7P77rvzla98hSOPPPLFXeQKGH4lSZKGgQkTJnDQQQfxta997dm2\nGTNmcN555wFw7rnnMmvWLADWXXdd/vCHP/Q51s4778zpp5/+bPjdZZddOO20055d79tfu+22G9/8\n5jcB+P73v89DDz0EwKxZs7jooov44x//yKOPPsp3v/vdZ2ubNWsWp5xyCrvtthuzZs3iK1/5CtOn\nTycJDz74IM888wwHHHAAn/rUp7jppptWqZ7+MPxKkiQNEx/96Eef9yixz3/+88yZM4dtt92Wc845\nh9NPPx2Agw8+mJNPPpnp06e/4ANv0Fr3e++999LV1QW0wu9dd93V53rfvpxwwglcffXVbLPNNlx4\n4YVsttlmAGy33XYcccQR7Ljjjuy0004ceeSRTJ8+HWiF3/vvv59ddtmFjTfemLFjxz4bjH/961+z\n++67M23aNA477DA+85nPrPqbtBJp/1Se1K6rq6u6F8BLktSJbr/9drbaaqvBLkM99HZfkiyoqq6V\nHetzftW33yyE2eP713f28pX3kSRJGmQue5AkSVLHMPxKkiSpYxh+JUmS1DEMv5IkSeoYQyb8Jtk/\nSSXZcg2N35Xkcyvv2efxByf5ZJKNk1yS5OYktyX5vwNc59NJFiW5Jcm3k7ziRY4zO8nHBrI2SZKk\n4W7IhF/gEOCa5veASjK6quZX1bGrMcy+wGXAPwNXVNXrq2pr4PgBKfI5j1XVtKqaAvwJeP8Ajy9J\nkoaRD3/4w5x22mnPvn7zm9/8vG8+++hHP8qnP/1pDjzwwFUa9+yzz+aDH/zggNU5XAyJR50lWQfY\nFdgD+E/ghCS7AycCDwNTgf8AlgAfAtYG9q+qO5NsBHwF2KwZ7riqujbJbOB1wGuBe5J8FfhYVb2l\nOd/ngS6ggBOr6oIkXwZ2aMb/TlWd0NQXYBpwEzARuLy79qpa3HYN3wPWB14G/ENVfa/Z9xHgr5pD\nzqqq5/4LXrG5wLbNGBcBmwJjgdOr6oym/ZGqWqfZPhB4S1Ud0eP9nda8R68A7gT+qqoe6mcNkiSp\nW38fAdrv8Vb+qNCZM2fyH//xHxx33HE888wzPPjgg/z+979/dv+8efM49dRT+fu///uBrW2EGioz\nv28FLquqnwPLkmzftL+e1sznVsC7gS2qakfgLOCYps/pwKlVtQNwQLOv29bA3lXVczb5H4HlVTW1\nqrYFfty0f7J5OPK2wBuSbNu0TwdurtY3gnwR+FqSnzTLIF7V9HkceFtVbUcrxP9bWrYH3gvsBOwM\n/HWS6St7Q5KMpjXbvKRp+quq2p5WYD82yQYrG6PNvwMfb651CXDCCs57VJL5SeYvenQDJj3+zV5/\nmL38+T+SJGmNmDFjBtdddx0At956K1OmTGHdddfloYce4oknnuD2229nwoQJTJkyBWjN6L797W9n\nn332YfLkyfzd3/3ds2PNmTOHLbbYgh133JFrr7322falS5ey5557su2227LXXntxzz338PTTT7P5\n5ptTVTz88MOMGjWKq6++Gmh9rfEvfvELrrrqKqZNm8a0adOYPn36Cr9SeagYKuH3EOC8Zvs8nlv6\ncGNV3V9VT9CaseyecV0CTGq29wa+kGQRcDEwrpmFBbi4qh7r5Xx70wqxALTNgh6U5CZgIbANrfAM\nsA/w/abvD2jNJp8JbAksbGafA3w6yWLgh8CrgY1pzWh/t6oerapHgAuBWSt4L9ZurmU+cA/Q/QXe\nxya5Gbie1gzw5BWM8awk44H1quqqpunrwG599a+qM6qqq6q6Rr1igP91K0mSVtmrXvUqRo8ezT33\n3MO8efPYZZdd2GmnnbjuuuuYP38+U6dO5eUvf/nzjlm0aBHnn38+S5Ys4fzzz+fee+/l/vvv54QT\nTuDaa6/lmmuu4bbbbnu2/zHHHMPhhx/O4sWLOfTQQzn22GMZNWoUf/7nf85tt93GNddcw3bbbcfc\nuXN54oknuPfee5k8eTKnnHIKX/ziF1m0aBFz585l7bXXfqnfnlU26MsekkwA9gSmJilgFK2lCJcC\nT7R1fabt9TM8V/tawM5V9XiPcQEeXYU6Ngc+BuxQVQ8lOZvWEgOAN9GaVQagqn4HfBP4ZpJLaIXJ\ndYGNgO2r6skkS9uOXxWPVdW0HrXtTiuw71JVf0xyZdvY7d9P/WLOJ0mShrgZM2Ywb9485s2bx0c+\n8hF+/etfM2/ePMaPH8/MmTNf0H+vvfZi/PjWJNbWW2/Nr371Kx588EF23313NtpoIwDe+c538vOf\n/xyA6667jgsvvBCAd7/73c/OFs+aNYurr76au+++m0984hOceeaZvOENb2CHHXYAWksyPvKRj3Do\noYfy9re/nU022WSNvxerayjM/B4InFNVr6mqSVW1KXA3K54dbXc5zy2B6F7fujJXAEe3HbM+MI5W\nWF6eZGNaSw66Z05HV9Wy5vWe3U9gSLIurXXF9wDjgd82wXcP4DXN8HOB/ZO8Iskrgbc1batiPPBQ\nE3y3pLV8ott/JdkqyVrN2M9TVcuBh5J0v5/vBq7q2U+SJA1dM2fOZN68eSxZsoQpU6aw8847c911\n1zFv3jxmzJjxgv5jxox5dnvUqFE89dRTL+q8u+22G3PnzuWGG27gL/7iL3j44Ye58sormTWrFSuO\nP/54zjrrLB577DFmzpzJHXfc8eIu8CU0FMLvIcB3e7RdQP+f+nAs0JVkcZLb6N/TET4FrN88Tuxm\nYI+qupnWcoc7aM3qdi+EeSOtZQzdtgfmN8sbrqP1AbYbgXObOpYA72nGoapuAs4GbgB+2vRf2M9r\n63YZMDrJ7cBJtJY+dDseuASYB9zfx/GHAyc3NU+j9cQKSZI0TMyYMYNLLrmECRMmMGrUKCZMmMDD\nDz/Mdddd12v47c1OO+3EVVddxbJly3jyySf59re//bzxzzuvtQL13HPPfTbc7rjjjsybN4+11lqL\nsWPHMm3aNL761a+y226tFZR33nknU6dO5eMf/zg77LDDsAi/g77soar26KXtc8DnerTt3rZ9JXBl\ns/0g8M5expjd43X7MY/QCoQ9jzmiZ1uSs2j7EF1VnQyc3MuxDwK79Gxv9n0W+Gxv+3rpu04vbU/Q\nzET3su87wHd6aZ/dtr2I588WS5KkYWTq1Kk8+OCDvOtd73pe2yOPPMKGG27II488stIxJk6cyOzZ\ns9lll11Yb731mDbtuT+Wf/7zn+e9730vJ598MhtttBFz5swBWjPIm266KTvv3IoRs2bN4lvf+hZT\np04F4LTTTuMnP/kJa621Fttssw377ttrXBlS0nqAgfRCYyZOromH9/5UtqUn7fcSVyNJ0kvv9ttv\nZ6utthrsMtRDb/clyYLmqV0rNOgzv52oeUzZj3rZtVf32mJJkiQNPMPvIGgCbn8+mCdJkqQBZPhV\nn6a+ejzzXd4gSZJGkKHwtAdJkqQhy89HDS2rez8Mv5IkSX0YO3Ysy5YtMwAPEVXFsmXLGDv2xX+v\nl8seJEmS+rDJJptw33338cADDwx2KWqMHTt2tb5JzvArSZLUh5e97GVsvvnmg12GBpDLHiRJktQx\nDL+SJEnqGIZfSZIkdQy/3lh9SvIH4GeDXYcGzIbAg4NdhAaU93Rk8X6OPN7Tl9ZrqmqjlXXyA29a\nkZ/15zuyNTwkme/9HFm8pyOL93Pk8Z4OTS57kCRJUscw/EqSJKljGH61ImcMdgEaUN7Pkcd7OrJ4\nP0ce7+kQ5AfeJEmS1DGc+ZUkSVLHMPzqBZLsk+RnSX6Z5PjBrkcrl2TTJD9JcluSW5N8qGmfkOSK\nJL9ofq/fdswnmnv8syRvHrzqtSJJRiVZmOSS5rX3dJhKsl6S7yS5I8ntSXbxfg5vST7c/G/uLUm+\nlWSs93ToM/zqeZKMAr4I7AtsDRySZOvBrUr98BTw0araGtgZOLq5b8cDP6qqycCPmtc0+w4GtgH2\nAb7U3HsNPR8Cbm977T0dvk4HLquqLYHX07qv3s9hKsmrgWOBrqqaAoyidc+8p0Oc4Vc97Qj8sqru\nqqo/AecBbx3kmrQSVXV/Vd3UbP+B1v+pvprWvft60+3rwP7N9luB86rqiaq6G/glrXuvISTJJsB+\nwFltzd7TYSjJeGA34GsAVfWnqnoY7+dwNxpYO8lo4BXAb/CeDnmGX/X0auDettf3NW0aJpJMAqYD\nPwU2rqr7m13/D9i42fY+Dw+nAX8HPNPW5j0dnjYHHgDmNMtYzkrySryfw1ZV/Ro4BbgHuB9YXlWX\n4z0d8gy/0giSZB3gAuC4qvp9+75qPdrFx7sME0neAvy2qhb01cd7OqyMBrYDvlxV04FHaf4c3s37\nObw0a3nfSusfNq8CXpnksPY+3tOhyfCrnn4NbNr2epOmTUNckpfRCr7nVtWFTfN/JZnY7J8I/LZp\n9z4PfTOBv0yylNbyoz2TfAPv6XB1H3BfVf30/7d3x6hVRHEUxr8DoiAuIYUpglsQLZM1iIUSxFJd\ngK7AyiXY2YQgmM7GXgUVJNopaiO4BCPH4k4RbB5Y5L1hvl81c6f5w+G9OQx3mOn8kFGGzXO+9oCv\nbX+1/Q08B65hphvP8qt/vQV2kmwnOc/YnH+05pm0QpIw9hJ+bvvk1KUjYH863gdenFq/meRCkm1g\nB3hzVvNqtbYP2261vcz4Hb5qewsznaW2P4EfSa5MS7vAJ8xzzr4DV5NcnP6DdxnvW5jphju37gG0\nWdqeJLkPvGS8ufq07fGax9Jq14HbwMckH6a1R8Bj4CDJXeAbcAOg7XGSA8bN9wS41/bP2Y+t/2Cm\n8/UAeDY9WPgC3GE8hDLPGWr7Oskh8I6R0XvGF90uYaYbzS+8SZIkaTHc9iBJkqTFsPxKkiRpMSy/\nkiRJWgzLryRJkhbD8itJkqTFsPxKkiRpMSy/kiRJWgzLryRJkhbjLyjvi/7D6/7gAAAAAElFTkSu\nQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11d7858d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "count_subset.plot(kind='barh')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-25T23:52:34.496407Z",
     "start_time": "2018-12-25T23:52:34.102104Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x11d765350>"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAr8AAAFpCAYAAACVlkBBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XmUXWWd7//3h0RSKBCZ2hsBCdpBhiQESJhCkFFA7BYF\nkUEUW5vWa8NF225xuoa+9q/xqpfBAUVsUBxABZSGK4IDEAhTBTIwOUEUkNtKhIBMQvj+/jg7UBSp\npDJUqlL7/VqrVu3z7Gc/+7vPXrA+eeo5+6SqkCRJktpgrcEuQJIkSVpdDL+SJElqDcOvJEmSWsPw\nK0mSpNYw/EqSJKk1DL+SJElqDcOvJEmSWsPwK0mSpNYw/EqSJKk1DL+SJElqjZGDXYCGro033rjG\njh072GVIkiQt06xZsx6sqk2W1c/wqz6NHTuW7u7uwS5DkiRpmZL8tj/9XPYgSZKk1jD8SpIkqTUM\nv5IkSWoN1/xKkiT14emnn+a+++7jySefHOxS1Ojq6mKzzTbjJS95yQodb/iVJEnqw3333cd6663H\n2LFjSTLY5bReVbFgwQLuu+8+ttxyyxUaw2UPkiRJfXjyySfZaKONDL5DRBI22mijlZqJd+ZXfZp3\n/0LGnnTZYJehNdT8rqMGuwRJWnkHfJc84JKHFfbKHVb5kCv7DxFnfiVJkoawbLoj/3Ty/3nu9We/\n/A2mf+7LSz3mB5f/nDt+efeL2h9e+Cgbbbc3VQXA9d1zyKY7ct/v/wuAhY88yobb7cWzzz7L//zM\nmfzkmhuXq9axuxzMg396aLmOWd2c+ZUkSeqnsWf8fpWON/+EVy6zz6hRa3PRj37GR45/FxtvuEG/\nxv3B5Vfxxv2mse1Wr35B+8tHr8eYV2zMnb+6h223ejUzu+eyw/itmdk9h8P/9vXccMs8dp60HWut\ntRb/+s/vW6FrGuqc+ZUkSRrCRo4YwXFHv4VTz/rWi/bNv/f37PPW45i43+Hse/g/8Lv7H2DmzXO4\n5Mqr+edPncak/Y/gN/PvfcExu0+eyMzuOQDMnDWHD/z9Uc+/7p7D1MmTADj2xE/y/Ut/AnRmdD/5\n2TPZ8YCjmLDv4dz163sAWPCnh3n9kf+d7fY+jPd86F+fm1EG+D9f+Sbjx49n/PjxnHbaaQB85jOf\n4YwzzgDgAx/4APvssw8AP/vZzzj66KNZtGgRxx57LOPHj2fChAmceuqpq+x9XMzwK0mSNMS9/9jD\n+dbFP2LhI4++oP34j3+ad771b5j7k+9y9FsO4oRPfIbdp2zP3+7/Oj7z8ROZfeX5vGbs5i84Zurk\nSc+F3bt/ez9vfeP+dM+9E4CZ3XPZffLEJdaw8YYbcMuPv837jjmMz375PABOPvUs9th5Erf//Pu8\n+cC9+d39/w+AWXPv4JzvXsKNN97IDTfcwFe/+lVuvfVWpk2bxowZMwDo7u7mz3/+M08//TQzZsxg\nzz33ZPbs2dx///3cdtttzJs3j3e9612r7k1sDLnwm+SQJJVk6wEaf3KSM1bi+COSfCzJsUmeTTKx\nx77bkoxdFXX2GPPvk1zQ4/X6SX6T5NVLO67XGN9McsiqrEuSJK0+66+3Lu847I2c8bXzX9B+/ax5\nHPXmAwE45tCDufam2csca/fJ2zOzey73/O5+xm7+Srq6RlFV/Pmxx5k170522XHCEo97y0GdWdqd\nJm7D/Hs7yz+uueEW3v6WNwBw8H7T2ODl6wNw7U2zefOBe/Oyl72Mddddl7e85S3MmDGDnXbaiVmz\nZvHII48watQodtttN7q7u5kxYwbTpk3j1a9+NXfffTfHH388l19+Oeuvv/6KvWFLMeTCL3AkcG3z\ne5VKMrKquqvqhJUY5iDg8mb7PuBjK1/ZUp0NbJ5kv+b1vwL/UVUvXsW+BElc1y1J0jBw4nuO4mvn\n/4DHHn9ipcYZ9+pX8fAjj/KfV17Dbjt1gu5OE7fhnAsuYexmr2Tdl710iceNGtX5UokRI0bwzKJF\nK3Tul7zkJWy55Zace+657L777kybNo2f//zn/PrXv2abbbZhgw02YM6cOey11158+ctf5j3vec+K\nXeRSDKnwm2RdYA/g3cARTdteSa5O8sMkdyc5JcnRSW5KMi/Ja5p+myS5MMnNzc/Upn16kvOSXAec\n14x36eLzJTmnGWdukkOb9jOTdCe5PcnJPeoLMAm4pWm6FNguyWuXcC2vT3J9kluSfK8515QkFzX7\n35TkiSRrJ+lKssQwW53FM+8FTksyGdgX+Ewzxo5JbmxqvzDJ6Kb92iSnJukG/rFXXf+e5GtJhtS9\nlyRJS7fhBqM5/G/252vf+eFzbbtPnsj5P/wxAN+66EdM26XzaLH11n0pjz72WJ9j7brjBE7/2nfY\nbafOH7B322kip539baZO2X65atpz1x359sWdOcEf/ew6Hnr4EQCm7bIDP/jxz3n88cd57LHHuPji\ni5k2bVpn37RpfPazn2XPPfdk2rRpfPnLX2aHHXYgCQ8++CDPPvsshx56KJ/61Ke45ZZb+jz3ihpq\nAehNwOVV9UtgQZKdmvbt6QTAbYBjgK2qamc6s6LHN31OB06tqinAoc2+xbYF9quq3rPJnwAWVtWE\nqpoI/Kxp/1hVTQYmAq/rsbRhB2BOPb+a+1ngfwMf7Tloko2Bjzfn3BHoBj4I3EonPANMA24DpgC7\nAH0+S6Sq5gI/Bn4KHF9Vf2l2fRP4YFP7L5rrWWxEVU2uqtN61HUqsD7wnqp6dknnSnJcE/y7Fz2+\nsK+SJEnSIPinfziGB//08HOvP/+pf+GcCy5h4n6Hc96Fl3H6v34IgCPedACfOfMb7PD6I1/0gTeA\nqVO2597f/z8mT9wW6ITfu397H7tPXr7w+8kPHMc1N97CdnsfxkU/+hmv2vS/AbDjhG049q1/y847\n78wuu+zCe97zHnbYoRPMp02bxgMPPMBuu+3GK17xCrq6up4Lxvfffz977bUXkyZN4u1vfzv//u//\nvvxv0jKk56fyBlszI3t6VV2Z5ATgVXRmVz9WVfs3fa4BPlJV1yXZBzihqg5J8geg5/NHNgFeC3yI\nzgTqyc3xewEfqqo3JpkFHFFVv+pVx3uB4+g8Cm4MncB5fpKPAvdU1XeSHAtMBk4EbgcOBP4TeCMw\nHjiXzrIIgLWB66vq3UmuBE4AvgKcCYwFRgB/qqovLeW9eTVwaVVt27zeCLi5ql7dvH4tcF5V7Zzk\nWuDDVXVds++bwATguqr678u4Dc8ZNWZcjXnnacvuKC2BX3IhaTi484Dvss0WfzXYZay5BuBLLgDu\nvPNOttlmmxe0JZnVTF4u1ZBZD5pkQ2AfYEKSohMIC7gMeKpH12d7vH6W569hLWDXqnrB17A03wLS\n97z/i+vYkk5gnlJVDyU5F+hqdr+ezqzyc6rqmSSfAz7ccxjgyiXMNANcQ2fd8NPAT+iE5BHAPy+j\ntGebn/7qfc03AVOSbFBVQ/vp05IkSQNkKC17OIzOzOUWVTW2qjYH7qGzPKA/ruD5JRAkmbSUvotd\nCby/xzEb0FkW8BiwMMkr6ARVmvW0I6tqwRLGORfYj85sM8ANwNQkf90c+7IkWzX7ZtCZLb6+qv4I\nbERnhvq2/l1mR1PHE0l2b5qOAa5eyiGXAZ8DLm3WVkuSJLXOUAq/RwIX92q7kP4/9eEEYHLz4a87\n6KwRXpZPARs0jyibA+xdVXPorM29C/g2cF3Td386M7Uv0qzBPQP4q+b1H4Fjge8kmQtcDyx+dNuN\nwCvozAADzAXm9VhHvDyOAU5tzrFtcz19qqrz6QT1HybpWlpfSZKk4WhIrfkdypKcDZxdVTcMdi2r\ni2t+tTJc8ytpOHDN70pyze+aq6pW/YPmJEmStFoZfoeQJF8EpvZqPr2qzhmMeiRJkoYbw+8QUlXv\nX3av1WfCpqPpPuXgwS5DayyfEy1pGLjzTnjlNsvuN4A+8IEPsMUWW3DiiScCcMABB7D55ptz9tmd\nrzT4p3/6JzbaaCNuueUWvv/97/d73HPPPZfu7m6+8IUvDEjdQ5XhV5Ikqb+mj17F4y17omDq1Kl8\n97vf5cQTT+TZZ5/lwQcf5JFHHnlu/8yZMzn11FP56Ec/upRRtNhQetqDJEmSetl99925/vrrAbj9\n9tsZP3486623Hg899BBPPfUUd955JxtuuCHjx48HOjO6b3nLWzjwwAMZN24c//Iv//LcWOeccw5b\nbbUVO++8M9ddd91z7fPnz2efffZh4sSJ7Lvvvvzud79j0aJFbLnlllQVDz/8MCNGjOCaazoPq9pz\nzz351a9+xdVXX82kSZOYNGkSO+ywA48++uhqfGdWjOFXkiRpCHvlK1/JyJEj+d3vfsfMmTPZbbfd\n2GWXXbj++uvp7u5mwoQJrL322i84Zvbs2VxwwQXMmzePCy64gHvvvZcHHniAT37yk1x33XVce+21\n3HHHHc/1P/7443nnO9/J3LlzOfrooznhhBMYMWIEr33ta7njjju49tpr2XHHHZkxYwZPPfUU9957\nL+PGjeOzn/0sX/ziF5k9ezYzZsxgnXXWWd1vz3Iz/EqSJA1xu+++OzNnznwu/O62227PvZ46tfdn\n5WHfffdl9OjRdHV1se222/Lb3/6WG2+8kb322otNNtmEtddem7e97W3P9b/++us56qjOIyqPOeYY\nrr32WgCmTZvGNddcwzXXXMNHPvIRrr32Wm6++WamTJkCdJZkfPCDH+SMM87g4YcfZuTIob+i1vAr\nSZI0xE2dOpWZM2cyb948xo8fz6677sr111/PzJkz2X333V/Uf9SoUc9tjxgxgmeeeWaFzrvnnnsy\nY8YMbrrpJt7whjfw8MMPc9VVVzFtWucLeE866STOPvtsnnjiCaZOncpdd921Yhe4Ghl+JUmShrjd\nd9+dSy+9lA033JARI0aw4YYb8vDDD3P99dcvMfwuyS677MLVV1/NggULePrpp/ne9773gvHPP/98\nAL71rW89F2533nlnZs6cyVprrUVXVxeTJk3iK1/5CnvuuScAv/nNb5gwYQIf/vCHmTJliuFXkiRJ\nK2/ChAk8+OCD7Lrrri9oGz16NBtvvHG/xhgzZgzTp09nt912Y+rUqS/4hrTPf/7znHPOOUycOJHz\nzjuP008/HejMIG+++ebPnXfatGk8+uijTJgwAYDTTjuN8ePHM3HiRF7ykpdw0EEHrapLHjB+vbH6\nNHny5Oru7h7sMiRJGjRL+hpdDb6V+XpjZ34lSZLUGoZfSZIktYbhV5IkSa1h+JUkSVoKPx81tKzs\n/TD8SpIk9aGrq4sFCxYYgIeIqmLBggV0dXWt8BhD/2s4JEmSBslmm23Gfffdxx//+MfBLkWNrq4u\nNttssxU+3vArSZLUh5e85CVsueWWg12GViHDr/o07/6FjD3pssEuQ9IAmd911GCXIGm4m75wsCt4\nEdf8SpIkqTUMv5IkSWoNw68kSZJaw/ArSZKk1jD8SpIkqTUMv5IkSWoNw68kSZJaw/ArSZKk1jD8\n6gWSHJekO0n3oseH3oOpJUmSVobhVy9QVWdV1eSqmjzipaMHuxxJkqRVyvArSZKk1jD8SpIkqTUM\nv5IkSWoNw68kSZJaw/ArSZKk1jD8SpIkqTVGDnYBGrombDqa7lMOHuwyJA0Yn+UtqX2c+ZUkSVJr\nGH4lSZLUGoZfSZIktYbhV5IkSa1h+JUkSVJrGH4lSZLUGoZfSZIktYbhV5IkSa1h+JUkSVJrGH4l\nSZLUGoZfSZIktYbhV5IkSa1h+JUkSVJrGH4lSZLUGoZfSZIktcbIwS5AQ9e8+xcy9qTLBrsMDVPz\nu44a7BIkSQNt+sLBruBFnPmVJElSaxh+JUmS1BqGX0mSJLXGGhl+kxySpJJsPUDjT05yxkocf0SS\njzXbByXpTnJHkluTfK5pPzfJYUs49pVJvr/i1UuSJKkva2T4BY4Erm1+r1JJRlZVd1WdsBLDHARc\nnmQ88AXg7VW1LTAZ+PXSDqyq31fVi0KxJEmSVt4aF36TrAvsAbwbOKJp2yvJ1Ul+mOTuJKckOTrJ\nTUnmJXlN02+TJBcmubn5mdq0T09yXpLrgPOa8S5dfL4k5zTjzE1yaNN+ZjOje3uSk3vUF2AScAvw\nL8C/VdVdAFW1qKrO7HE5eyaZ2dR8WHP82CS3Ndsjknw2yW3NuY9v2v9nU/9tSc5qzkmSKU2/2Uk+\n02Ocrh7XcGuSvQfk5kiSJA1xa1z4Bd4EXF5VvwQWJNmpad8eeC+wDXAMsFVV7QycDRzf9DkdOLWq\npgCHNvsW2xbYr6p6zyZ/AlhYVROqaiLws6b9Y1U1GZgIvC7JxKZ9B2BOVRUwHpi1lGsZQyfIvxE4\nZQn7jwPGApOac3+raf9CVU2pqvHAOs3xAOcA/1BVk4BFPcZ5P1BVNYHObPnXk3QtpS5JkqRhaU0M\nv0cC5zfb5/P80oebq+qBqnoK+A1wRdM+j06ABNgP+EKS2cAlwPrNTDLAJVX1xBLOtx/wxcUvquqh\nZvPwJLcAtwLb0QnPAAcCP+rntfygqp6tqjuAV/Rx7q9U1TPNuf/UtO+d5MYk84B9gO2SvBxYr6qu\nb/p8u8c4ewDfbMa4C/gtsNWSCkpyXDOj3b3o8aH3bD5JkqSVsUZ9yUWSDemEvQlJChgBFHAZ8FSP\nrs/2eP0sz1/nWsCuVfVkr3EBHluOOrYEPgRMqaqHkpwLLJ5JfT2dWWWA24GdgDl9DNWz5vTz3F3A\nl4DJVXVvkuk9zr3Squos4CyAUWPG1aoaV5IkaShY02Z+DwPOq6otqmpsVW0O3ANM6+fxV/D8EgiS\nTOrHMVfSWTaw+JgNgPXphOWFSV5B5wNuJBkNjKyqBU33zwAfTbJVs3+tJO/tZ62Lz/0PSUY2x2/I\n80H3wWbW+jCAqnoYeDTJLs3+I3qMMwM4uhljK+BVwC+Wow5JkqRhYU0Lv0cCF/dqu5D+P/XhBGBy\n86GwO+isEV6WTwEbNB8umwPsXVVz6Cx3uIvO8oLrmr77Az9ZfGBVzQVOBL6T5E7gNuDV/awVOmuS\nfwfMbc59VBNyv9qM9WPg5h793w18tVnW8TJg8bqFLwFrNcskLgCObZaHSJIktUo6n8vSqpDkbODs\nqrphkM6/blX9udk+CRhTVf9jRccbNWZcjXnnaausPqmn+V1HDXYJkqSBNn31fX4oyazmYQRLtUat\n+R3qquo9g1zCwUk+Que+/hY4dnDLkSRJGloMv8NIVV1AZ1mDJEmSlmBNW/MrSZIkrTBnftWnCZuO\npvuUgwe7DA1bPkdakrT6OfMrSZKk1jD8SpIkqTUMv5IkSWoNw68kSZJaw/ArSZKk1jD8SpIkqTUM\nv5IkSWoNw68kSZJaw/ArSZKk1jD8SpIkqTUMv5IkSWoNw68kSZJaw/ArSZKk1jD8SpIkqTUMv5Ik\nSWoNw68kSZJaY+RgF6Cha979Cxl70mWDXYa0SszvOmqwS5Ck9pm+cLAreBFnfiVJktQahl9JkiS1\nhuFXkiRJrWH4lSRJUmsMaPhNckiSSrL1AI0/OckZK3H8EUk+luTYJF9YlbX1OMfIJH9McspAjN+c\nY36SjQdqfEmSpOFioGd+jwSubX6vUklGVlV3VZ2wEsMcBFy+qmrqw/7AL4G3JskAn0uSJElLMWDh\nN8m6wB7Au4Ejmra9klyd5IdJ7k5ySpKjk9yUZF6S1zT9NklyYZKbm5+pTfv0JOcluQ44rxnv0sXn\nS3JOM87cJIc27Wcm6U5ye5KTe9QXYBJwy1Ku4chmvNuSfLppG5Hk3KZtXpIPLOOtOBI4HfgdsFuP\nsecnOTnJLc04W/e49iubes9O8tvFs7pJ3t68V7OTfCXJiCXU/KI+K1CzJEnSsDSQM79vAi6vql8C\nC5Ls1LRvD7wX2AY4BtiqqnYGzgaOb/qcDpxaVVOAQ5t9i20L7FdVvWeTPwEsrKoJVTUR+FnT/rGq\nmgxMBF6XZGLTvgMwp6pqScUneSXwaWAfOiF5SpJDmu1Nq2p8VU0AzunrDUjSBewH/CfwHV48A/5g\nVe0InAl8qGn7JPCzqtoO+D7wqmasbYC3AVOrahKwCDi61/n66rM8NR/X/GOhe9HjQ+/ZfJIkSStj\nIMPvkcD5zfb5PB/8bq6qB6rqKeA3wBVN+zxgbLO9H/CFJLOBS4D1m5lkgEuq6oklnG8/4IuLX1TV\nQ83m4UluAW4FtqMTngEOBH60lPqnAFdV1R+r6hngW8CewN3Aq5N8PsmBwCNLGeONwM+bei8EDuk1\nW3tR83tWj2vfg+Z9q6rLgcXXsS+wE3Bz877sC7y61/n66tPvmqvqrKqaXFWTR7x09FIuTZIkac0z\nIN/wlmRDOjOmE5IUMAIo4DLgqR5dn+3x+tke9awF7FpVT/YaF+Cx5ahjSzozqlOq6qEk5wJdze7X\n05lVXi7NONsDB9CZwT4c+Ls+uh8J7JFkfvN6Izrvy5XN68XXvohl34sAX6+qj6xIn+WoWZIkadga\nqJnfw4DzqmqLqhpbVZsD9wDT+nn8FTy/BIIkk/pxzJXA+3scswGwPp2wvDDJK+h8wI0ko4GRVbVg\nKePdRGeZxMbNbO2RwNXN+tu1qupC4OPAjks6OMn6dK73Vc17MLapb1kf/ruOTjglyeuBDZr2nwKH\nJfmrZt+GSbbodewS+/S3ZkmSpOFuoMLvkcDFvdoupP9PfTgBmNx8cO0OOrOVy/IpYIPmQ11zgL2r\nag6d5Q53Ad+mEyyh8wSGn/Q6/tgk9y3+oTNbfRLwc2AOMKuqfghsClzVLCv4JtDXTOyb6azd7TnT\n/UPgb5KMWsp1nAy8PsltwFuB/wc8WlV30AmuVySZSyfsj+l54FL69LdmSZKkYS19fN5rWEtyNnB2\nVd0w2LX01gTjRVX1TJLdgDObD6+tdqPGjKsx7zxtME4trXLzu44a7BIkqX2mr74PzyeZ1TzkYKkG\nZM3vUFdV7xnsGpbiVcB3k6wF/AX4+0GuR5IkadhoZfhd1ZJ8EZjaq/n0qurzkWJ9qapf0XkMmyRJ\nklaxVi57UP9Mnjy5uru7B7sMSZKkZervsoeB/npjSZIkacgw/EqSJKk1DL+SJElqDcOvJEmSWsPw\nK0mSpNYw/EqSJKk1DL+SJElqDcOvJEmSWsPwK0mSpNYw/EqSJKk1DL+SJElqDcOvJEmSWsPwK0mS\npNYw/EqSJKk1DL+SJElqjZGDXYCGrnn3L2TsSZcNdhmSpOUwv+uowS5Bet70hYNdwYs48ytJkqTW\nMPxKkiSpNQy/kiRJao1hEX6THJKkkmw9QONPTnLGShx/RJKPJTk2yR+T3JrkV0l+nGT3VVmrJEmS\n+jYswi9wJHBt83uVSjKyqrqr6oSVGOYg4PJm+4Kq2qGqxgGnABcl2WalC11OSfywoyRJap01Pvwm\nWRfYA3g3cETTtleSq5P8MMndSU5JcnSSm5LMS/Kapt8mSS5McnPzM7Vpn57kvCTXAec14126+HxJ\nzmnGmZvk0Kb9zCTdSW5PcnKP+gJMAm7pXXtV/Rw4Cziu6fuaJJcnmZVkxuKZ7CTnJjkjyczmeg5r\n2s9PcnCPc52b5LAkI5J8prmmuUn+ocf7MiPJJcAdq/RGSJIkrQGGw+zfm4DLq+qXSRYk2alp3x7Y\nBvgTcDdwdlXtnOR/AMcDJwKnA6dW1bVJXgX8uDkGYFtgj6p6IslePc73CWBhVU0ASLJB0/6xqvpT\nkhHAT5NMrKq5wA7AnKqqTg5+kVuAf2i2zwLeW1W/SrIL8CVgn2bfGDohf2vgEuD7wAXA4cBlSdYG\n9gXeR+cfAgurakqSUcB1Sa5oxtkRGF9V9/Tr3ZUkSRpGhkP4PZJOiAU4v3l9KXBzVT0AkOQ3wOLw\nNw/Yu9neD9i2Ryhdv5lJBrikqp5Ywvn2o5lhBqiqh5rNw5McR+c9HUMnPM8FDgR+tJT609S4LrA7\n8L0e9Yzq0e8HVfUscEeSVzRtPwJObwLugcA1TVh/PTBx8QwxMBoYB/wFuGlpwbe5huMARqy/yVLK\nliRJWvOs0eE3yYZ0ZkYnJClgBFDAZcBTPbo+2+P1szx/3WsBu1bVk73GBXhsOerYEvgQMKWqHkpy\nLtDV7H49cOhSDt8BuLOp5eGqmtRHv57XE4CqejLJVcABwNvohP/F+4+vqh/3qnMvlnFdVXUWnRlo\nRo0ZV0vrK0mStKZZ09f8HgacV1VbVNXYqtocuAeY1s/jr6CzBAKAJH0Fz56uBN7f45gNgPXphMqF\nzazsQc2+0cDIqlqwpIGSvI7OLOtXq+oR4J4kb232Jcn2/ajnAuBddK558Yfqfgy8L8lLmrG2SvKy\nfowlSZI0rK3p4fdI4OJebRfS/6c+nABMbj4Udgfw3n4c8ylggyS3JZkD7F1Vc4BbgbuAbwPXNX33\nB37S6/i3JZmd5JfAR4FDq+rOZt/RwLubcW+ns555Wa4AXgf8pKr+0rSdTecDbbckuQ34Cmv4LL8k\nSdKqkCr/sj1QkpxN54N2Nwx2LSti1JhxNeadpw12GZKk5TC/66jBLkF63vSFq+1USWZV1eRl9XM2\ncABV1XsGuwZJkiQ9b01f9iBJkiT1m+FXkiRJrWH4lSRJUmu45ld9mrDpaLpPOXjZHSVJQ8jq+4CR\ntCZy5leSJEmtYfiVJElSaxh+JUmS1BqGX0mSJLXGMsNvkq8lmdSrbfqAVSRJkiQNkP7M/B4AfD3J\nO3q0/e0A1SNJkiQNmP6E3z8AewJvTfLFJCOBDGxZkiRJ0qrXn/CbqlpYVX8D/BG4Chg9oFVJkiRJ\nA6A/4ffKxRtVNR34NHDPQBUkSZIkDZT+hN/9er6oqv8ENhmYciRJkqSB0+fXGyd5H/Dfgdckmdtj\n13rAdQNdmCRJkrSq9Rl+gW8DPwL+HTipR/ujVfWnAa1KkiRJGgB9ht+qWggsBI5cfeVIkiRJA8dv\neJMkSVJrGH4lSZLUGktb86uWm3f/QsaedNlglyFJK21+11GDXYLUTtMXDnYFL+LMryRJklrD8CtJ\nkqTWMPxKkiSpNYblmt8ki4B5PZrOr6pTBrGek4B7gXHAn6vqs6tw7LHApVU1flWNKUmSNFwNy/AL\nPFFVk1YJ9CFZAAAYCUlEQVTkwCQjq+qZVVzPAcDhdMKvJEmSBkmrlj0kmZ9k42Z7cpKrmu3pSc5L\nch1wXpKuJOckmZfk1iR7N/2OTfLDJFcl+VWST/YY++1JbkoyO8lXkoxo2tcH1q6qPy6lrg8mua35\nObFpG5vkziRfTXJ7kiuSrNPs2ynJnCRzgPf3GGdpdV+U5PKm7v+9at9ZSZKkNcNwDb/rNCF08c/b\n+nHMtsB+VXUknUBZVTWBzjfcfT1JV9NvZ+BQYCLw1iZEbwO8DZjazDgvAo5u+u8H/LSvkybZCXgX\nsAuwK/D3SXZodo8DvlhV2wEPN+cFOAc4vqq27zXc0uqe1NQ4AXhbks378Z5IkiQNKy57eN4lVfVE\ns70H8HmAqroryW+BrZp9V1bVAoAkFzV9nwF2Am5OArAO8Iem/4F0wmpf9gAurqrHeow5DbgEuKeq\nZjf9ZgFjk7wceHlVXdO0nwcc1I+6f9p8ZTVJ7gC2oLMO+QWSHAccBzBi/U2WUrYkSdKaZ7iG3748\nw/Oz3V299j3WzzFqCa8DfL2qPrKE/jsD7+t3hS/0VI/tRXRC9YrqPdYS731VnQWcBTBqzLje1ypJ\nkrRGG67LHvoyn84MLTy/hGBJZtAsW0iyFfAq4BfNvv2TbNisvz0EuI7OsobDkvxVc8yGSbZIsh1w\nV1UtWsa5Dkny0iQvA97ctC1RVT0MPJxkj6bp6B67l1a3JElS6w3X8Nt7ze/ix5ydDJyepJvO7Gdf\nvgSslWQecAFwbFUtnjm9CbgQmAtcWFXdVXUH8HHgiiRzgSuBMXSWI1zea+yPJ7lv8U9V3QKc24x7\nI3B2Vd26jOt7F/DFJLPpzDr3p25JkqTWS5V/2e6vJMcCk6vqH/vZ/0rgHVX1wIAWNkBGjRlXY955\n2mCXIUkrbX7XUYNdgtRO0xeutlMlmVVVk5fVr21rflerqtp/sGuQJEnS8wy/y6GqzqWzREGSJElr\noOG65leSJEl6EWd+1acJm46m+5SDB7sMSVoFVt+6Q0lDmzO/kiRJag3DryRJklrD8CtJkqTWMPxK\nkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSp\nNQy/kiRJag3DryRJklrD8CtJkqTWGDnYBWjomnf/QsaedNlqO9/8rqNW27kkSdJqMH3hYFfwIs78\nSpIkqTUMv5IkSWoNw68kSZJaw/ArSZKk1jD8LqckhySpJFsvo9//TfLyfox3UpKPJZnd/CzqsX3C\nUo77ZpJDVuQaJEmS2sqnPSy/I4Frm9+f7KtTVb2hn+MdABxeVf8GkOTPVTVppauUJEnSizjzuxyS\nrAvsAbwbOKJpG5Pkmmam9rYk05r2+Uk2brZ/kGRWktuTHNdjvPWBtavqj0s555ZJfp5kbpIrk2y2\nhD7/nuRrSV6f5Ps92g9K8r1m++1J5jU1/n+r5h2RJElasxh+l8+bgMur6pfAgiQ7AUcBP25ma7cH\nZi/huL+rqp2AycAJSTZq2vcDfrqMc34JOLuqJgLfA07ruTPJqcD6wHuAnwATe4z/LuA/msD8KWBv\nYAdgapI3Lsd1S5IkDQuG3+VzJHB+s31+8/pm4F1JpgMTqurRJRx3QpI5wA3A5sC4pv1A4EfLOOcu\nPc75DWBaj30nA6Oq6v3V8SzwLeCoJBsCOwFXNGP8rKoerKqngW8Dey7pZEmOS9KdpHvR40PvwdSS\nJEkrwzW//dSEyX2ACUkKGAEU8M90guTBwLlJ/k9VfaPHcXvRmeHdraoeT3IV0NXs3hl430qUdRMw\nJckGVfVQ0/YfwIXN9gVVtShJvwesqrOAswBGjRlXK1GbJEnSkOPMb/8dBpxXVVtU1diq2hy4h07w\n/a+q+ipwNrBjr+NGAw81wXdrYFeAJNsBd1XVomWc9wbg8Gb77cA1PfZdBnwOuLRZj0xV3Qs8CJwE\nnNv0uxHYO8lGSUbSWa989XJdvSRJ0jDgzG//HQl8ulfbhXQC5mNJngb+DLyjV5/LgfcmuRP4BZ0w\nC3BQs29Z3k9n3e5HgP+is473OVV1fpL1gB8mObiqnqSzrGH9Zm0yVXVfkk8AVwEB/rOqLuvHuSVJ\nkoaVVPmX7cGQ5ErgHVX1wACM/WXg+qr6+sqMM2rMuBrzztOW3XEVmd911Go7lyRJWg2mr77PDyWZ\nVVWTl9XPmd9BUlX7D8S4SWYDDwF9fkGGJElSWxl+hxm/IEOSJKlvfuBNkiRJreHMr/o0YdPRdJ9y\n8Go8o88VliRJA8uZX0mSJLWG4VeSJEmtYfiVJElSaxh+JUmS1BqGX0mSJLWG4VeSJEmtYfiVJElS\naxh+JUmS1BqGX0mSJLWG4VeSJEmtYfiVJElSaxh+JUmS1BqGX0mSJLWG4VeSJEmtYfiVJElSa4wc\n7AI0hP3+Vpg+erCrkCRJa6rpCwe7ghdx5leSJEmtYfiVJElSaxh+JUmS1BprfPhNsijJ7CS3Jfle\nkpeuwBh/m+SkZnuTJDcmuTXJtCT/N8nLl3H8mCRXJBmb5LZe+6Yn+dDy1rSM8+2V5NJ+9Pvzqjyv\nJEnSmm6ND7/AE1U1qarGA38B3ru8A1TVJVV1SvNyX2BeVe1QVTOq6g1V9fAyhjgQ+PHynleSJEmr\n13AIvz3NAP4aIMkPksxKcnuS4xZ3SHJgkluSzEny06bt2CRfSDIJ+N/Am5rZ5HWSzE+ycdPvHUnm\nNsee1+O8BwI/WlZxSSYluaEZ4+IkGzTtVyX5dJKbkvwyybSmvSvJOUnmNTPRey9hzBfMLDcz4GN7\n9XnBTHFzrccuq15JkqThZtg86izJSOAg4PKm6e+q6k9J1gFuTnIhnbD/VWDPqronyYY9x6iq2Un+\nJzC5qv6xGXfx+NsBHwd2r6oHFx+bZATw2qq6owmdr0kyu8ew/w34bLP9DeD4qro6yb8CnwRObPaN\nrKqdk7yhad8PeH+nrJqQZGvgiiRbrYK3S5IkqZWGQ/hdp0fYnAF8rdk+Icmbm+3NgXHAJsA1VXUP\nQFX9aTnOsw/wvap6sNexuwA39uj3m6qatPhFkunN79HAy6vq6mbX14Hv9Tjuoub3LGBss70H8Pnm\nfHcl+S0woOG3mSU/DuBVozOQp5IkSVrthkP4faJn2ITOn/npzJzuVlWPJ7kK6Bqg8/ecbV4ZTzW/\nF7F89+UZXrh8ZUnX2Z8+AFTVWcBZAJNfOaKWow5JkqQhb7it+V1sNPBQE3y3BnZt2m8A9kyyJUDv\nZQ/L8DPgrUk26nXsvsBPlnVwVS0EHlq8nhc4Brh6KYdAZyb76OZ8WwGvAn7Rq898YMemz47AlksY\n57fAtklGNU+u2HdZ9UqSJA1Hw2Hmd0kuB96b5E46YfEGgKr6Y/Nn/YuSrAX8Adi/PwNW1e1J/g24\nOski4NYk/ww8WVWP9rOudwJfbh7HdjfwrmX0/xJwZpJ5dGZvj62qpxavQ25cCLwjye10ll/8cgm1\n35vku8BtwD3Arf2sV5IkaVhJlX/ZXlFJ3g5s1uMxacPK5FeOqO7j1h3sMiRJ0ppq+sLVdqoks6pq\n8rL6DdeZ39Wiqr452DVIkiSp/4brml9JkiTpRQy/kiRJag2XPahvr9wBpncPdhWSJEmrjDO/kiRJ\nag3DryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJag3D\nryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSpNUYOdgEauubdv5Cx\nJ1022GVIWknzu44a7BIktdX0hYNdwYs48ytJkqTWMPxKkiSpNQy/kiRJao1hH36TLEoyu8fPSYNc\nz0lJjk4yPUkl+ese+05s2iYvx3h7Jbm0j32Tk5zRx775STZe/iuQJElac7XhA29PVNWkFTkwyciq\nemYV13MAcDgwDpgHHAF8qtn3VuD2VXGSpvZuoHtVjCdJkjQcDPuZ3770nPlsZkivaranJzkvyXXA\neUm6kpyTZF6SW5Ps3fQ7NskPk1yV5FdJPtlj7LcnuamZaf5KkhFN+/rA2lX1x6brD4A3NfteAywE\nHuwxzplJupPcnuTkHu0HJrkryS3AW3q09679uVnhJBsluaIZ62wgq/o9lSRJGuraEH7X6bXs4W39\nOGZbYL+qOhJ4P1BVNQE4Evh6kq6m387AocBE4K1NiN4GeBswtZlxXgQc3fTfD/hpj/M8AtybZDyd\nGeALetXxsaqa3Iz/uiQTm3N/FfgbYCfgvy2l9p4+CVxbVdsBFwOv6sf7IEmSNKy47GHJLqmqJ5rt\nPYDPA1TVXUl+C2zV7LuyqhYAJLmo6fsMnVB6cxKAdYA/NP0PBM7pda7z6QTfA4B9gXf12Hd4kuPo\n3KcxdILtWsA9VfWr5rzfBI7ro/ae9qSZJa6qy5I8tKQLb853HMCI9TdZUhdJkqQ1VhvCb1+e4fmZ\n765e+x7r5xi1hNcBvl5VH1lC/52B9/VquxT4DNBdVY80gZkkWwIfAqZU1UNJzl1CnUvS39qXqKrO\nAs4CGDVmXO/rkyRJWqO1YdlDX+bTmaGFztKFvsygWbaQZCs6ywV+0ezbP8mGSdYBDgGuo7Os4bAk\nf9Ucs2GSLZJsB9xVVYt6Dl5VjwMfBv6t13nXpxNkFyZ5BXBQ034XMLZZIwydpRj9cQ1wVFPTQcAG\n/TxOkiRp2GjDzO86SWb3eH15VZ0EnAx8Lcn/Aq5ayvFfAs5MMo/ObPGxVfVUM0N7E3AhsBnwzebp\nCiT5OHBFkrWAp+msG94DuHxJJ6iq85fQNifJrXTC7r10gjVV9WSzNOGyJI/TCefr9eN9OBn4TpLb\ngZnA7/pxjCRJ0rCSKv+yvSKSHAtMrqp/7Gf/K4F3VNUDA1rYKjRqzLga887TBrsMSStpftdRg12C\npLaavnC1nSrJrOZBAUvVhpnfIaGq9h/sGiRJktrO8LuCqupc4NxBLkOSJEnLoc0feJMkSVLLOPOr\nPk3YdDTdpxw82GVIWmmrb82dJA11zvxKkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSpNQy/\nkiRJag3DryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJag3DryRJklrD8CtJkqTWMPxKkiSpNQy/kiRJ\nag3DryRJklpj5GAXoKFr3v0LGXvSZYNdhrTGmt911GCXIEmDa/rCwa7gRZz5lSRJUmsYfiVJktQa\nhl9JkiS1huFXkiRJrTFkwm+SQ5JUkq0HaPzJSc5YieOPSPKxJK9IcmmSOUnuSPJ/V3Gdi5LMTnJb\nku8leekKjjM9yYdWZW2SJElruiETfoEjgWub36tUkpFV1V1VJ6zEMAcBlwP/ClxZVdtX1bbASauk\nyOc9UVWTqmo88Bfgvat4fEmSpNYaEuE3ybrAHsC7gSOatr2SXJ3kh0nuTnJKkqOT3JRkXpLXNP02\nSXJhkpubn6lN+/Qk5yW5DjivGe/SxedLck4zztwkhzbtZybpTnJ7kpN71BdgEnALMAa4b/G+qprb\nY8yfJrmlGfdNPY7/YDOTe1uSE5fjrZkB/HUzxg+SzGpqO67H2H/usX1YknOX8P5OSnJDc60XJ9lg\nOWqQJEkaNoZE+AXeBFxeVb8EFiTZqWnfns7M5zbAMcBWVbUzcDZwfNPndODUqpoCHNrsW2xbYL+q\n6j2b/AlgYVVNqKqJwM+a9o9V1WRgIvC6JBOb9h2AOVVVwBeBryX5ebMM4pVNnyeBN1fVjsDewOfS\nsRPwLmAXYFfg75PssKw3JMlIOrPN85qmv6uqnYDJwAlJNlrWGD18A/hwc63zgE8u5bzHNf8A6F70\n+NB7Np8kSdLKGCrh90jg/Gb7fJ5f+nBzVT1QVU8BvwGuaNrnAWOb7f2ALySZDVwCrN/MJANcUlVP\nLOF8+9EJsQBU1UPN5uFJbgFuBbajE54BDgR+1PT9MfBq4KvA1sCtSTYBAvx/SeYCPwE2BV5BZ0b7\n4qp6rKr+DFwETFvKe7FOcy3dwO+ArzXtJySZA9wAbA6MW8oYz0kyGnh5VV3dNH0d2LOv/lV1VlVN\nrqrJI146uj+nkCRJWmMM+je8JdkQ2AeYkKSAEUABlwFP9ej6bI/Xz/J87WsBu1bVk73GBXhsOerY\nEvgQMKWqHmqWD3Q1u19PZ1YZgKr6E/Bt4NvNUoo9gfWATYCdqurpJPN7HL88nqiqSb1q24tOYN+t\nqh5PclWPsatH1xU5nyRJUmsMhZnfw4DzqmqLqhpbVZsD97D02dGeruD5JRAkmbSUvotdCby/xzEb\nAOvTCcsLk7yCzpKDxTOnI6tqQfN6n8VPYEiyHvAaOjO0o4E/NMF3b2CLZvgZwCFJXprkZcCbm7bl\nMRp4qAm+W9NZPrHYfyXZJslazdgvUFULgYeSLH4/jwGu7t1PkiSpDYZC+D0SuLhX24X0/6kPJwCT\nmw9z3UH/no7wKWCD5gNoc4C9q2oOneUOd9GZ1b2u6bs/nWUMi+0EdDfLG64Hzq6qm4FvNXXMA97R\njENV3QKcC9wE3Nj0v7Wf17bY5cDIJHcCp9BZ+rDYScClwEzggT6OfyfwmabmSXSeWCFJktQ66XyG\nS31JcjadwHrDMjsPM6PGjKsx7zxtsMuQ1ljzu44a7BIkaXBNX30fnk8yq3lwwVIN+prfoa6q3jPY\nNUiSJGnVMPwOguYxZT9dwq59F68tliRJ0qpn+B0ETcDtzwfzJEmStAoZftWnCZuOpvuUgwe7DGkN\n5hfFSNJQMxSe9iBJkiStFoZfSZIktYbhV5IkSa1h+JUkSVJrGH4lSZLUGoZfSZIktYbhV5IkSa2R\nqhrsGjREJXkU+MVg16Fl2hh4cLCL0DJ5n9YM3qc1h/dqzbA679MWVbXJsjr5JRdaml9U1eTBLkJL\nl6Tb+zT0eZ/WDN6nNYf3as0wFO+Tyx4kSZLUGoZfSZIktYbhV0tz1mAXoH7xPq0ZvE9rBu/TmsN7\ntWYYcvfJD7xJkiSpNZz5lSRJUmsYflsuyYFJfpHk10lOWsL+JDmj2T83yY6DUWfb9eM+Hd3cn3lJ\nZibZfjDq1LLvVY9+U5I8k+Sw1VmfOvpzn5LslWR2ktuTXL26a1S//t83Osl/JpnT3Kd3DUadbZfk\nP5L8IcltfewfUlnC8NtiSUYAXwQOArYFjkyyba9uBwHjmp/jgDNXa5Hq7326B3hdVU0A/hdDcI1V\nG/TzXi3u92ngitVboaB/9ynJy4EvAX9bVdsBb13thbZcP/97ej9wR1VtD+wFfC7J2qu1UAGcCxy4\nlP1DKksYftttZ+DXVXV3Vf0FOB94U68+bwK+UR03AC9PMmZ1F9pyy7xPVTWzqh5qXt4AbLaaa1RH\nf/6bAjgeuBD4w+osTs/pz306Crioqn4HUFXeq9WvP/epgPWSBFgX+BPwzOotU1V1DZ33vi9DKksY\nftttU+DeHq/va9qWt48G1vLeg3cDPxrQitSXZd6rJJsCb8a/ogym/vw3tRWwQZKrksxK8o7VVp0W\n6899+gKwDfB7YB7wP6rq2dVTnpbDkMoSfsObNIwk2ZtO+N1jsGtRn04DPlxVz3YmqzREjQR2AvYF\n1gGuT3JDVf1ycMtSLwcAs4F9gNcAVyaZUVWPDG5ZGsoMv+12P7B5j9ebNW3L20cDq1/3IMlE4Gzg\noKpasJpq0wv1515NBs5vgu/GwBuSPFNVP1g9JYr+3af7gAVV9RjwWJJrgO0Bw+/q05/79C7glOo8\nt/XXSe4BtgZuWj0lqp+GVJZw2UO73QyMS7Jl8wGBI4BLevW5BHhH80nNXYGFVfXA6i605ZZ5n5K8\nCrgIOMaZqUG1zHtV/397d6gSURCFcfz/oRargnVNvoBg8Ql8EhGfQYPvIAYRm8VikfU1FNkiBjGa\nTYvHcDcapnh3Yf6/esuBwx2+OQwzVbtVNamqCXAPnBh8R9ey9j0Ah0nWk2wCB8Bs5Dp719KnD4bp\nPEl2gD3gfdQq1WKlsoST345V1TzJKfAErAE3VfWa5Hjx/Qp4BI6AN+CbYZetETX26QzYAi4XE8V5\nVe0vq+ZeNfZKS9bSp6qaJZkCz8APcF1Vf17jpP/R+D9dALdJXoAwHCn6WlrRnUpyx3DbxnaST+Ac\n2IDVzBK+8CZJkqRueOxBkiRJ3TD8SpIkqRuGX0mSJHXD8CtJkqRuGH4lSZLUDcOvJEmSumH4lSRJ\nUjcMv5IkSerGL4UNZxNmk4DXAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11c402550>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "normed_subset = count_subset.div(count_subset.sum(1), axis=0)\n",
    "print(type(normed_subset))\n",
    "normed_subset.plot(kind='barh', stacked=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MovieLens 1M data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "encoding = 'latin1'\n",
    "\n",
    "upath = os.path.expanduser('ch02/movielens/users.dat')\n",
    "rpath = os.path.expanduser('ch02/movielens/ratings.dat')\n",
    "mpath = os.path.expanduser('ch02/movielens/movies.dat')\n",
    "\n",
    "unames = ['user_id', 'gender', 'age', 'occupation', 'zip']\n",
    "rnames = ['user_id', 'movie_id', 'rating', 'timestamp']\n",
    "mnames = ['movie_id', 'title', 'genres']\n",
    "\n",
    "users = pd.read_csv(upath, sep='::', header=None, names=unames, encoding=encoding)\n",
    "ratings = pd.read_csv(rpath, sep='::', header=None, names=rnames, encoding=encoding)\n",
    "movies = pd.read_csv(mpath, sep='::', header=None, names=mnames, encoding=encoding)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "users[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "movies[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = pd.merge(pd.merge(ratings, users), movies)\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data.ix[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mean_ratings = data.pivot_table('rating', index='title',\n",
    "                                columns='gender', aggfunc='mean')\n",
    "mean_ratings[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings_by_title = data.groupby('title').size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ratings_by_title[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "active_titles = ratings_by_title.index[ratings_by_title >= 250]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "active_titles[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mean_ratings = mean_ratings.ix[active_titles]\n",
    "mean_ratings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mean_ratings = mean_ratings.rename(index={'Seven Samurai (The Magnificent Seven) (Shichinin no samurai) (1954)':\n",
    "                           'Seven Samurai (Shichinin no samurai) (1954)'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "top_female_ratings = mean_ratings.sort_index(by='F', ascending=False)\n",
    "top_female_ratings[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Measuring rating disagreement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mean_ratings['diff'] = mean_ratings['M'] - mean_ratings['F']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sorted_by_diff = mean_ratings.sort_index(by='diff')\n",
    "sorted_by_diff[:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Reverse order of rows, take first 15 rows\n",
    "sorted_by_diff[::-1][:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Standard deviation of rating grouped by title\n",
    "rating_std_by_title = data.groupby('title')['rating'].std()\n",
    "# Filter down to active_titles\n",
    "rating_std_by_title = rating_std_by_title.ix[active_titles]\n",
    "# Order Series by value in descending order\n",
    "rating_std_by_title.order(ascending=False)[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### US Baby Names 1880-2010"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import division\n",
    "from numpy.random import randn\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rc('figure', figsize=(12, 5))\n",
    "np.set_printoptions(precision=4)\n",
    "%pwd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "http://www.ssa.gov/oact/babynames/limits.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "!head -n 10 ch02/names/yob1880.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "names1880 = pd.read_csv('ch02/names/yob1880.txt', names=['name', 'sex', 'births'])\n",
    "names1880"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "names1880.groupby('sex').births.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 2010 is the last available year right now\n",
    "years = range(1880, 2011)\n",
    "\n",
    "pieces = []\n",
    "columns = ['name', 'sex', 'births']\n",
    "\n",
    "for year in years:\n",
    "    path = 'ch02/names/yob%d.txt' % year\n",
    "    frame = pd.read_csv(path, names=columns)\n",
    "\n",
    "    frame['year'] = year\n",
    "    pieces.append(frame)\n",
    "\n",
    "# Concatenate everything into a single DataFrame\n",
    "names = pd.concat(pieces, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "total_births = names.pivot_table('births', index='year',\n",
    "                                 columns='sex', aggfunc=sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "total_births.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "total_births.plot(title='Total births by sex and year')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def add_prop(group):\n",
    "    # Integer division floors\n",
    "    births = group.births.astype(float)\n",
    "\n",
    "    group['prop'] = births / births.sum()\n",
    "    return group\n",
    "names = names.groupby(['year', 'sex']).apply(add_prop)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.allclose(names.groupby(['year', 'sex']).prop.sum(), 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_top1000(group):\n",
    "    return group.sort_index(by='births', ascending=False)[:1000]\n",
    "grouped = names.groupby(['year', 'sex'])\n",
    "top1000 = grouped.apply(get_top1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pieces = []\n",
    "for year, group in names.groupby(['year', 'sex']):\n",
    "    pieces.append(group.sort_index(by='births', ascending=False)[:1000])\n",
    "top1000 = pd.concat(pieces, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "top1000.index = np.arange(len(top1000))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "top1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Analyzing naming trends"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "boys = top1000[top1000.sex == 'M']\n",
    "girls = top1000[top1000.sex == 'F']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "total_births = top1000.pivot_table('births', index='year', columns='name',\n",
    "                                   aggfunc=sum)\n",
    "total_births"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subset = total_births[['John', 'Harry', 'Mary', 'Marilyn']]\n",
    "subset.plot(subplots=True, figsize=(12, 10), grid=False,\n",
    "            title=\"Number of births per year\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Measuring the increase in naming diversity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.figure()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "table = top1000.pivot_table('prop', index='year',\n",
    "                            columns='sex', aggfunc=sum)\n",
    "table.plot(title='Sum of table1000.prop by year and sex',\n",
    "           yticks=np.linspace(0, 1.2, 13), xticks=range(1880, 2020, 10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = boys[boys.year == 2010]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "prop_cumsum = df.sort_index(by='prop', ascending=False).prop.cumsum()\n",
    "prop_cumsum[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "prop_cumsum.values.searchsorted(0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = boys[boys.year == 1900]\n",
    "in1900 = df.sort_index(by='prop', ascending=False).prop.cumsum()\n",
    "in1900.values.searchsorted(0.5) + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_quantile_count(group, q=0.5):\n",
    "    group = group.sort_index(by='prop', ascending=False)\n",
    "    return group.prop.cumsum().values.searchsorted(q) + 1\n",
    "\n",
    "diversity = top1000.groupby(['year', 'sex']).apply(get_quantile_count)\n",
    "diversity = diversity.unstack('sex')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_quantile_count(group, q=0.5):\n",
    "    group = group.sort_index(by='prop', ascending=False)\n",
    "    return group.prop.cumsum().values.searchsorted(q) + 1\n",
    "diversity = top1000.groupby(['year', 'sex']).apply(get_quantile_count)\n",
    "diversity = diversity.unstack('sex')\n",
    "diversity.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "diversity.plot(title=\"Number of popular names in top 50%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The \"Last letter\" Revolution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# extract last letter from name column\n",
    "get_last_letter = lambda x: x[-1]\n",
    "last_letters = names.name.map(get_last_letter)\n",
    "last_letters.name = 'last_letter'\n",
    "\n",
    "table = names.pivot_table('births', index=last_letters,\n",
    "                          columns=['sex', 'year'], aggfunc=sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subtable = table.reindex(columns=[1910, 1960, 2010], level='year')\n",
    "subtable.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subtable.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "letter_prop = subtable / subtable.sum().astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "fig, axes = plt.subplots(2, 1, figsize=(10, 8))\n",
    "letter_prop['M'].plot(kind='bar', rot=0, ax=axes[0], title='Male')\n",
    "letter_prop['F'].plot(kind='bar', rot=0, ax=axes[1], title='Female',\n",
    "                      legend=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.subplots_adjust(hspace=0.25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "letter_prop = table / table.sum().astype(float)\n",
    "\n",
    "dny_ts = letter_prop.ix[['d', 'n', 'y'], 'M'].T\n",
    "dny_ts.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.close('all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dny_ts.plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Boy names that became girl names (and vice versa)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "all_names = top1000.name.unique()\n",
    "mask = np.array(['lesl' in x.lower() for x in all_names])\n",
    "lesley_like = all_names[mask]\n",
    "lesley_like"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "filtered = top1000[top1000.name.isin(lesley_like)]\n",
    "filtered.groupby('name').births.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "table = filtered.pivot_table('births', index='year',\n",
    "                             columns='sex', aggfunc='sum')\n",
    "table = table.div(table.sum(1), axis=0)\n",
    "table.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plt.close('all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "table.plot(style={'M': 'k-', 'F': 'k--'})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
